From 65b7236899c27a1cbb9b136bdc94ae4f32e412b2 Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Sun, 13 Jul 2025 15:49:40 +0200 Subject: [PATCH 01/10] feat: Add support for new gemini embedding model. (#5621) --- src/core/webview/webviewMessageHandler.ts | 25 +++++++------ .../embedders/__tests__/gemini.spec.ts | 36 +++++++++++++++++++ src/services/code-index/embedders/gemini.ts | 11 ++++-- .../code-index/embedders/openai-compatible.ts | 29 ++++++++++----- .../code-index/interfaces/embedder.ts | 2 +- src/shared/embeddingModels.ts | 3 ++ 6 files changed, 84 insertions(+), 22 deletions(-) diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index e70b39df8f..283ba7da6c 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -2065,11 +2065,14 @@ export const webviewMessageHandler = async ( } case "requestIndexingStatus": { - const status = provider.codeIndexManager!.getCurrentStatus() - provider.postMessageToWebview({ - type: "indexingStatusUpdate", - values: status, - }) + const manager = provider.codeIndexManager + if (manager) { + const status = manager.getCurrentStatus() + provider.postMessageToWebview({ + type: "indexingStatusUpdate", + values: status, + }) + } break } case "requestCodeIndexSecretStatus": { @@ -2094,8 +2097,8 @@ export const webviewMessageHandler = async ( } case "startIndexing": { try { - const manager = provider.codeIndexManager! - if (manager.isFeatureEnabled && manager.isFeatureConfigured) { + const manager = provider.codeIndexManager + if (manager && manager.isFeatureEnabled && manager.isFeatureConfigured) { if (!manager.isInitialized) { await manager.initialize(provider.contextProxy) } @@ -2109,9 +2112,11 @@ export const webviewMessageHandler = async ( } case "clearIndexData": { try { - const manager = provider.codeIndexManager! - await manager.clearIndexData() - provider.postMessageToWebview({ type: "indexCleared", values: { success: true } }) + const manager = provider.codeIndexManager + if (manager) { + await manager.clearIndexData() + provider.postMessageToWebview({ type: "indexCleared", values: { success: true } }) + } } catch (error) { provider.log(`Error clearing index data: ${error instanceof Error ? error.message : String(error)}`) provider.postMessageToWebview({ diff --git a/src/services/code-index/embedders/__tests__/gemini.spec.ts b/src/services/code-index/embedders/__tests__/gemini.spec.ts index 378e6e7d95..7a35a40d1c 100644 --- a/src/services/code-index/embedders/__tests__/gemini.spec.ts +++ b/src/services/code-index/embedders/__tests__/gemini.spec.ts @@ -114,4 +114,40 @@ describe("GeminiEmbedder", () => { await expect(embedder.validateConfiguration()).rejects.toThrow("Validation failed") }) }) + + describe("createEmbeddings", () => { + let mockCreateEmbeddings: any + + beforeEach(() => { + mockCreateEmbeddings = vitest.fn() + MockedOpenAICompatibleEmbedder.prototype.createEmbeddings = mockCreateEmbeddings + embedder = new GeminiEmbedder("test-api-key") + }) + + it("should use default model when none is provided", async () => { + // Arrange + const texts = ["text1", "text2"] + mockCreateEmbeddings.mockResolvedValue({ embeddings: [], usage: { promptTokens: 0, totalTokens: 0 } }) + + // Act + await embedder.createEmbeddings(texts) + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "text-embedding-004", undefined) + }) + + it("should pass model and dimension to the OpenAICompatibleEmbedder", async () => { + // Arrange + const texts = ["text1", "text2"] + const model = "custom-model" + const options = { dimension: 1536 } + mockCreateEmbeddings.mockResolvedValue({ embeddings: [], usage: { promptTokens: 0, totalTokens: 0 } }) + + // Act + await embedder.createEmbeddings(texts, model, options) + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, model, options) + }) + }) }) diff --git a/src/services/code-index/embedders/gemini.ts b/src/services/code-index/embedders/gemini.ts index fcca4c0fda..b1581c852f 100644 --- a/src/services/code-index/embedders/gemini.ts +++ b/src/services/code-index/embedders/gemini.ts @@ -44,10 +44,15 @@ export class GeminiEmbedder implements IEmbedder { * @param model Optional model identifier (ignored - always uses text-embedding-004) * @returns Promise resolving to embedding response */ - async createEmbeddings(texts: string[], model?: string): Promise { + async createEmbeddings( + texts: string[], + model?: string, + options?: { dimension?: number }, + ): Promise { try { - // Always use the fixed Gemini model, ignoring any passed model parameter - return await this.openAICompatibleEmbedder.createEmbeddings(texts, GeminiEmbedder.GEMINI_MODEL) + // Use the provided model or the fixed Gemini model + const modelToUse = model || GeminiEmbedder.GEMINI_MODEL + return await this.openAICompatibleEmbedder.createEmbeddings(texts, modelToUse, options) } catch (error) { TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { error: error instanceof Error ? error.message : String(error), diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index d882e78313..205675e9a4 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -71,7 +71,11 @@ export class OpenAICompatibleEmbedder implements IEmbedder { * @param model Optional model identifier * @returns Promise resolving to embedding response */ - async createEmbeddings(texts: string[], model?: string): Promise { + async createEmbeddings( + texts: string[], + model?: string, + options?: { dimension?: number }, + ): Promise { const modelToUse = model || this.defaultModelId // Apply model-specific query prefix if required @@ -139,7 +143,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { } if (currentBatch.length > 0) { - const batchResult = await this._embedBatchWithRetries(currentBatch, modelToUse) + const batchResult = await this._embedBatchWithRetries(currentBatch, modelToUse, options) allEmbeddings.push(...batchResult.embeddings) usage.promptTokens += batchResult.usage.promptTokens usage.totalTokens += batchResult.usage.totalTokens @@ -181,7 +185,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder { url: string, batchTexts: string[], model: string, + options?: { dimension?: number }, ): Promise { + const body: Record = { + input: batchTexts, + model: model, + encoding_format: "base64", + } + + if (options?.dimension) { + body.dimensions = options.dimension + } + const response = await fetch(url, { method: "POST", headers: { @@ -191,11 +206,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { "api-key": this.apiKey, Authorization: `Bearer ${this.apiKey}`, }, - body: JSON.stringify({ - input: batchTexts, - model: model, - encoding_format: "base64", - }), + body: JSON.stringify(body), }) if (!response || !response.ok) { @@ -234,6 +245,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { private async _embedBatchWithRetries( batchTexts: string[], model: string, + options?: { dimension?: number }, ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { // Use cached value for performance const isFullUrl = this.isFullUrl @@ -244,7 +256,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { if (isFullUrl) { // Use direct HTTP request for full endpoint URLs - response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model) + response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model, options) } else { // Use OpenAI SDK for base URLs response = (await this.embeddingsClient.embeddings.create({ @@ -254,6 +266,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // when processing numeric arrays, which breaks compatibility with models using larger dimensions. // By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves. encoding_format: "base64", + ...(options?.dimension && { dimensions: options.dimension }), })) as OpenAIEmbeddingResponse } diff --git a/src/services/code-index/interfaces/embedder.ts b/src/services/code-index/interfaces/embedder.ts index 0a74446d5e..bb7c5283bb 100644 --- a/src/services/code-index/interfaces/embedder.ts +++ b/src/services/code-index/interfaces/embedder.ts @@ -9,7 +9,7 @@ export interface IEmbedder { * @param model Optional model ID to use for embeddings * @returns Promise resolving to an EmbeddingResponse */ - createEmbeddings(texts: string[], model?: string): Promise + createEmbeddings(texts: string[], model?: string, options?: { dimension?: number }): Promise /** * Validates the embedder configuration by testing connectivity and credentials. diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 4c6bc24319..e70ba23f53 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -48,6 +48,9 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { }, gemini: { "text-embedding-004": { dimension: 768 }, + // ADD: New model with a default dimension. + // The actual dimension will be passed from the configuration at runtime. + "gemini-embedding-exp-03-07": { dimension: 768 }, }, } From 761b2ea8423a13708e927e145507c67751ab1781 Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Tue, 15 Jul 2025 12:39:37 +0200 Subject: [PATCH 02/10] feat: Enhance Gemini embedder with configurable dimensions and validation for embedding models --- packages/types/src/codebase-index.ts | 21 +++- src/shared/embeddingModels.ts | 4 + .../src/components/chat/CodeIndexPopover.tsx | 109 ++++++++++++++++-- 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/packages/types/src/codebase-index.ts b/packages/types/src/codebase-index.ts index 0ad19d8676..d9922a53ed 100644 --- a/packages/types/src/codebase-index.ts +++ b/packages/types/src/codebase-index.ts @@ -42,11 +42,24 @@ export type CodebaseIndexConfig = z.infer * CodebaseIndexModels */ +const modelProfileSchema = z.object({ + /** The fixed dimension for the model, or a fallback for models with variable dimensions. */ + dimension: z.number(), + scoreThreshold: z.number().optional(), + queryPrefix: z.string().optional(), + /** The minimum dimension supported by a variable-dimension model. */ + minDimension: z.number().optional(), + /** The maximum dimension supported by a variable-dimension model. */ + maxDimension: z.number().optional(), + /** The default dimension for a variable-dimension model, used for UI presentation. */ + defaultDimension: z.number().optional(), +}) + export const codebaseIndexModelsSchema = z.object({ - openai: z.record(z.string(), z.object({ dimension: z.number() })).optional(), - ollama: z.record(z.string(), z.object({ dimension: z.number() })).optional(), - "openai-compatible": z.record(z.string(), z.object({ dimension: z.number() })).optional(), - gemini: z.record(z.string(), z.object({ dimension: z.number() })).optional(), + openai: z.record(z.string(), modelProfileSchema).optional(), + ollama: z.record(z.string(), modelProfileSchema).optional(), + "openai-compatible": z.record(z.string(), modelProfileSchema).optional(), + gemini: z.record(z.string(), modelProfileSchema).optional(), }) export type CodebaseIndexModels = z.infer diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index f387480c65..0a6890bf47 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -5,9 +5,13 @@ export type EmbedderProvider = "openai" | "ollama" | "openai-compatible" | "gemini" // Add other providers as needed export interface EmbeddingModelProfile { + /** The fixed dimension for the model, or a fallback for models with variable dimensions. */ dimension: number scoreThreshold?: number // Model-specific minimum score threshold for semantic search queryPrefix?: string // Optional prefix required by the model for queries + minDimension?: number // The minimum dimension supported by a variable-dimension model. + maxDimension?: number // The maximum dimension supported by a variable-dimension model. + defaultDimension?: number // The default dimension for a variable-dimension model, used for UI presentation. // Add other model-specific properties if needed, e.g., context window size } diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index b5742cc623..be379ffb69 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -71,7 +71,7 @@ interface LocalCodeIndexSettings { } // Validation schema for codebase index settings -const createValidationSchema = (provider: EmbedderProvider, t: any) => { +const createValidationSchema = (provider: EmbedderProvider, t: any, models: any) => { const baseSchema = z.object({ codebaseIndexEnabled: z.boolean(), codebaseIndexQdrantUrl: z @@ -115,12 +115,32 @@ const createValidationSchema = (provider: EmbedderProvider, t: any) => { }) case "gemini": - return baseSchema.extend({ - codebaseIndexGeminiApiKey: z.string().min(1, t("settings:codeIndex.validation.geminiApiKeyRequired")), - codebaseIndexEmbedderModelId: z - .string() - .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), - }) + return baseSchema + .extend({ + codebaseIndexGeminiApiKey: z + .string() + .min(1, t("settings:codeIndex.validation.geminiApiKeyRequired")), + codebaseIndexEmbedderModelId: z + .string() + .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), + codebaseIndexEmbedderModelDimension: z.number().optional(), + }) + .refine( + (data) => { + const model = models?.gemini?.[data.codebaseIndexEmbedderModelId || ""] + if (model?.minDimension && model?.maxDimension && data.codebaseIndexEmbedderModelDimension) { + return ( + data.codebaseIndexEmbedderModelDimension >= model.minDimension && + data.codebaseIndexEmbedderModelDimension <= model.maxDimension + ) + } + return true + }, + { + message: t("settings:codeIndex.validation.invalidDimension"), + path: ["codebaseIndexEmbedderModelDimension"], + }, + ) default: return baseSchema @@ -188,7 +208,13 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexEmbedderBaseUrl: codebaseIndexConfig.codebaseIndexEmbedderBaseUrl || "", codebaseIndexEmbedderModelId: codebaseIndexConfig.codebaseIndexEmbedderModelId || "", codebaseIndexEmbedderModelDimension: - codebaseIndexConfig.codebaseIndexEmbedderModelDimension || undefined, + // This order is critical to prevent a UI race condition. After saving, + // the component's local `currentSettings` is updated immediately, while + // the global `codebaseIndexConfig` might still be stale. Prioritizing + // `currentSettings` ensures the UI reflects the saved value instantly. + currentSettings.codebaseIndexEmbedderModelDimension || + codebaseIndexConfig.codebaseIndexEmbedderModelDimension || + undefined, codebaseIndexSearchMaxResults: codebaseIndexConfig.codebaseIndexSearchMaxResults ?? CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_RESULTS, codebaseIndexSearchMinScore: @@ -349,7 +375,7 @@ export const CodeIndexPopover: React.FC = ({ // Validation function const validateSettings = (): boolean => { - const schema = createValidationSchema(currentSettings.codebaseIndexEmbedderProvider, t) + const schema = createValidationSchema(currentSettings.codebaseIndexEmbedderProvider, t, codebaseIndexModels) // Prepare data for validation const dataToValidate: any = {} @@ -916,6 +942,71 @@ export const CodeIndexPopover: React.FC = ({

)} + {(() => { + const selectedModelProfile = + codebaseIndexModels?.gemini?.[ + currentSettings.codebaseIndexEmbedderModelId + ] + + // Conditionally render the dimension slider only for Gemini models + // that explicitly define a min and max dimension in their profile. + if ( + selectedModelProfile?.minDimension && + selectedModelProfile?.maxDimension + ) { + return ( +
+
+ +
+
+ + updateSetting( + "codebaseIndexEmbedderModelDimension", + values[0], + ) + } + className="flex-1" + data-testid="model-dimension-slider" + /> + + {currentSettings.codebaseIndexEmbedderModelDimension ?? + selectedModelProfile.defaultDimension ?? + selectedModelProfile.minDimension} + + + updateSetting( + "codebaseIndexEmbedderModelDimension", + selectedModelProfile.defaultDimension, + ) + }> + + +
+ {formErrors.codebaseIndexEmbedderModelDimension && ( +

+ {formErrors.codebaseIndexEmbedderModelDimension} +

+ )} +
+ ) + } + return null + })()} )} From 2dfb2c44feb8b23989c711f3d12e0c38c515841c Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Tue, 15 Jul 2025 13:49:32 +0200 Subject: [PATCH 03/10] feat: Enhance Gemini embedder with configurable dimensions and validation for embedding models --- .../embedders/__tests__/gemini.spec.ts | 36 +++++++++++++++++++ src/services/code-index/embedders/gemini.ts | 8 +++-- .../code-index/embedders/openai-compatible.ts | 29 ++++++++++----- .../code-index/interfaces/embedder.ts | 2 +- src/shared/embeddingModels.ts | 8 ++++- 5 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/services/code-index/embedders/__tests__/gemini.spec.ts b/src/services/code-index/embedders/__tests__/gemini.spec.ts index d41a4dc1e9..b749fa1a04 100644 --- a/src/services/code-index/embedders/__tests__/gemini.spec.ts +++ b/src/services/code-index/embedders/__tests__/gemini.spec.ts @@ -190,4 +190,40 @@ describe("GeminiEmbedder", () => { await expect(embedder.validateConfiguration()).rejects.toThrow("Validation failed") }) }) + + describe("createEmbeddings", () => { + let mockCreateEmbeddings: any + + beforeEach(() => { + mockCreateEmbeddings = vitest.fn() + MockedOpenAICompatibleEmbedder.prototype.createEmbeddings = mockCreateEmbeddings + embedder = new GeminiEmbedder("test-api-key") + }) + + it("should use default model when none is provided", async () => { + // Arrange + const texts = ["text1", "text2"] + mockCreateEmbeddings.mockResolvedValue({ embeddings: [], usage: { promptTokens: 0, totalTokens: 0 } }) + + // Act + await embedder.createEmbeddings(texts) + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "text-embedding-004", undefined) + }) + + it("should pass model and dimension to the OpenAICompatibleEmbedder", async () => { + // Arrange + const texts = ["text1", "text2"] + const model = "custom-model" + const options = { dimension: 1536 } + mockCreateEmbeddings.mockResolvedValue({ embeddings: [], usage: { promptTokens: 0, totalTokens: 0 } }) + + // Act + await embedder.createEmbeddings(texts, model, options) + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, model, options) + }) + }) }) diff --git a/src/services/code-index/embedders/gemini.ts b/src/services/code-index/embedders/gemini.ts index 7e795875c9..df0aa50133 100644 --- a/src/services/code-index/embedders/gemini.ts +++ b/src/services/code-index/embedders/gemini.ts @@ -47,11 +47,15 @@ export class GeminiEmbedder implements IEmbedder { * @param model Optional model identifier (uses constructor model if not provided) * @returns Promise resolving to embedding response */ - async createEmbeddings(texts: string[], model?: string): Promise { + async createEmbeddings( + texts: string[], + model?: string, + options?: { dimension?: number }, + ): Promise { try { // Use the provided model or fall back to the instance's model const modelToUse = model || this.modelId - return await this.openAICompatibleEmbedder.createEmbeddings(texts, modelToUse) + return await this.openAICompatibleEmbedder.createEmbeddings(texts, modelToUse, options) } catch (error) { TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { error: error instanceof Error ? error.message : String(error), diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index d882e78313..205675e9a4 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -71,7 +71,11 @@ export class OpenAICompatibleEmbedder implements IEmbedder { * @param model Optional model identifier * @returns Promise resolving to embedding response */ - async createEmbeddings(texts: string[], model?: string): Promise { + async createEmbeddings( + texts: string[], + model?: string, + options?: { dimension?: number }, + ): Promise { const modelToUse = model || this.defaultModelId // Apply model-specific query prefix if required @@ -139,7 +143,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { } if (currentBatch.length > 0) { - const batchResult = await this._embedBatchWithRetries(currentBatch, modelToUse) + const batchResult = await this._embedBatchWithRetries(currentBatch, modelToUse, options) allEmbeddings.push(...batchResult.embeddings) usage.promptTokens += batchResult.usage.promptTokens usage.totalTokens += batchResult.usage.totalTokens @@ -181,7 +185,18 @@ export class OpenAICompatibleEmbedder implements IEmbedder { url: string, batchTexts: string[], model: string, + options?: { dimension?: number }, ): Promise { + const body: Record = { + input: batchTexts, + model: model, + encoding_format: "base64", + } + + if (options?.dimension) { + body.dimensions = options.dimension + } + const response = await fetch(url, { method: "POST", headers: { @@ -191,11 +206,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { "api-key": this.apiKey, Authorization: `Bearer ${this.apiKey}`, }, - body: JSON.stringify({ - input: batchTexts, - model: model, - encoding_format: "base64", - }), + body: JSON.stringify(body), }) if (!response || !response.ok) { @@ -234,6 +245,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { private async _embedBatchWithRetries( batchTexts: string[], model: string, + options?: { dimension?: number }, ): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> { // Use cached value for performance const isFullUrl = this.isFullUrl @@ -244,7 +256,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { if (isFullUrl) { // Use direct HTTP request for full endpoint URLs - response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model) + response = await this.makeDirectEmbeddingRequest(this.baseUrl, batchTexts, model, options) } else { // Use OpenAI SDK for base URLs response = (await this.embeddingsClient.embeddings.create({ @@ -254,6 +266,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { // when processing numeric arrays, which breaks compatibility with models using larger dimensions. // By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves. encoding_format: "base64", + ...(options?.dimension && { dimensions: options.dimension }), })) as OpenAIEmbeddingResponse } diff --git a/src/services/code-index/interfaces/embedder.ts b/src/services/code-index/interfaces/embedder.ts index 0a74446d5e..bb7c5283bb 100644 --- a/src/services/code-index/interfaces/embedder.ts +++ b/src/services/code-index/interfaces/embedder.ts @@ -9,7 +9,7 @@ export interface IEmbedder { * @param model Optional model ID to use for embeddings * @returns Promise resolving to an EmbeddingResponse */ - createEmbeddings(texts: string[], model?: string): Promise + createEmbeddings(texts: string[], model?: string, options?: { dimension?: number }): Promise /** * Validates the embedder configuration by testing connectivity and credentials. diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 0a6890bf47..eb7fd92d05 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -52,7 +52,13 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { }, gemini: { "text-embedding-004": { dimension: 768 }, - "gemini-embedding-001": { dimension: 3072, scoreThreshold: 0.4 }, + "gemini-embedding-001": { + dimension: 3072, // Fallback, but defaultDimension is preferred + minDimension: 128, + maxDimension: 3072, + defaultDimension: 3072, + scoreThreshold: 0.4, + }, }, } From 643bc406e4785ee7a8d47b1451ca03dbf530eaa4 Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Tue, 15 Jul 2025 15:07:51 +0200 Subject: [PATCH 04/10] fix: Update createEmbeddings tests to include undefined dimension for consistency --- src/services/code-index/embedders/__tests__/gemini.spec.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/services/code-index/embedders/__tests__/gemini.spec.ts b/src/services/code-index/embedders/__tests__/gemini.spec.ts index b749fa1a04..73d574b30c 100644 --- a/src/services/code-index/embedders/__tests__/gemini.spec.ts +++ b/src/services/code-index/embedders/__tests__/gemini.spec.ts @@ -104,7 +104,7 @@ describe("GeminiEmbedder", () => { const result = await embedder.createEmbeddings(texts) // Assert - expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001") + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001", undefined) expect(result).toEqual(mockResponse) }) @@ -124,7 +124,7 @@ describe("GeminiEmbedder", () => { const result = await embedder.createEmbeddings(texts, "gemini-embedding-001") // Assert - expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001") + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001", undefined) expect(result).toEqual(mockResponse) }) @@ -209,7 +209,7 @@ describe("GeminiEmbedder", () => { await embedder.createEmbeddings(texts) // Assert - expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "text-embedding-004", undefined) + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001", undefined) }) it("should pass model and dimension to the OpenAICompatibleEmbedder", async () => { From 7a23d0364540f54f0c0d0825c9dce8c341d49e02 Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:57:25 +0200 Subject: [PATCH 05/10] fix(ui): resolve state synchronization bug in settings popover --- .../src/components/chat/CodeIndexPopover.tsx | 59 +++++++++++++------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index 93407f1846..674b3f5d96 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -198,40 +198,44 @@ export const CodeIndexPopover: React.FC = ({ setIndexingStatus(externalIndexingStatus) }, [externalIndexingStatus]) - // Initialize settings from global state + // Initializes the settings from the global state when it changes useEffect(() => { if (codebaseIndexConfig) { + const provider = codebaseIndexConfig.codebaseIndexEmbedderProvider || "openai" + const modelId = codebaseIndexConfig.codebaseIndexEmbedderModelId || "" + const modelProfile = codebaseIndexModels?.[provider]?.[modelId] + const settings = { codebaseIndexEnabled: codebaseIndexConfig.codebaseIndexEnabled ?? true, codebaseIndexQdrantUrl: codebaseIndexConfig.codebaseIndexQdrantUrl || "", - codebaseIndexEmbedderProvider: codebaseIndexConfig.codebaseIndexEmbedderProvider || "openai", + codebaseIndexEmbedderProvider: provider, codebaseIndexEmbedderBaseUrl: codebaseIndexConfig.codebaseIndexEmbedderBaseUrl || "", - codebaseIndexEmbedderModelId: codebaseIndexConfig.codebaseIndexEmbedderModelId || "", + codebaseIndexEmbedderModelId: modelId, + // Determines the dimension exclusively from the global configuration and model profiles. + // The local 'currentSettings' state is no longer read here. codebaseIndexEmbedderModelDimension: - // This order is critical to prevent a UI race condition. After saving, - // the component's local `currentSettings` is updated immediately, while - // the global `codebaseIndexConfig` might still be stale. Prioritizing - // `currentSettings` ensures the UI reflects the saved value instantly. - currentSettings.codebaseIndexEmbedderModelDimension || - codebaseIndexConfig.codebaseIndexEmbedderModelDimension || - undefined, + codebaseIndexConfig.codebaseIndexEmbedderModelDimension || modelProfile?.defaultDimension, codebaseIndexSearchMaxResults: - codebaseIndexConfig.codebaseIndexSearchMaxResults ?? CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_RESULTS, + codebaseIndexConfig.codebaseIndexSearchMaxResults ?? + CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_RESULTS, codebaseIndexSearchMinScore: - codebaseIndexConfig.codebaseIndexSearchMinScore ?? CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_MIN_SCORE, + codebaseIndexConfig.codebaseIndexSearchMinScore ?? + CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_MIN_SCORE, + // Keys are initially set to empty and populated by a separate effect. codeIndexOpenAiKey: "", codeIndexQdrantApiKey: "", - codebaseIndexOpenAiCompatibleBaseUrl: codebaseIndexConfig.codebaseIndexOpenAiCompatibleBaseUrl || "", + codebaseIndexOpenAiCompatibleBaseUrl: + codebaseIndexConfig.codebaseIndexOpenAiCompatibleBaseUrl || "", codebaseIndexOpenAiCompatibleApiKey: "", codebaseIndexGeminiApiKey: "", } setInitialSettings(settings) setCurrentSettings(settings) - // Request secret status to check if secrets exist + // Requests the status of the secrets to display placeholders correctly. vscode.postMessage({ type: "requestCodeIndexSecretStatus" }) } - }, [codebaseIndexConfig, currentSettings.codebaseIndexEmbedderModelDimension]) + }, [codebaseIndexConfig, codebaseIndexModels]) // Dependencies are now correct and complete. // Request initial indexing status useEffect(() => { @@ -373,6 +377,27 @@ export const CodeIndexPopover: React.FC = ({ } } + // Handles model changes, ensuring dimension is reset correctly + const handleModelChange = (newModelId: string) => { + const provider = currentSettings.codebaseIndexEmbedderProvider + const modelProfile = codebaseIndexModels?.[provider]?.[newModelId] + const defaultDimension = modelProfile?.defaultDimension + + setCurrentSettings((prev) => ({ + ...prev, + codebaseIndexEmbedderModelId: newModelId, + codebaseIndexEmbedderModelDimension: defaultDimension, + })) + + // Clear validation errors for model and dimension + setFormErrors((prev) => { + const newErrors = { ...prev } + delete newErrors.codebaseIndexEmbedderModelId + delete newErrors.codebaseIndexEmbedderModelDimension + return newErrors + }) + } + // Validation function const validateSettings = (): boolean => { const schema = createValidationSchema(currentSettings.codebaseIndexEmbedderProvider, t, codebaseIndexModels) @@ -910,9 +935,7 @@ export const CodeIndexPopover: React.FC = ({ - updateSetting("codebaseIndexEmbedderModelId", e.target.value) - } + onChange={(e: any) => handleModelChange(e.target.value)} className={cn("w-full", { "border-red-500": formErrors.codebaseIndexEmbedderModelId, })}> From 530c71baf1fe6c5ecedad4aedf2e1e0afb446231 Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Tue, 15 Jul 2025 19:37:04 +0200 Subject: [PATCH 06/10] feat(ui): Add configurable dimension for Gemini models Replaces the dimension slider with a text input for better precision, addressing reviewer feedback. Adds validation for the dimension range and corrects all related UI text and translations. --- .../src/components/chat/CodeIndexPopover.tsx | 101 +++++++++--------- webview-ui/src/i18n/locales/en/settings.json | 2 + 2 files changed, 51 insertions(+), 52 deletions(-) diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index 674b3f5d96..ad139dd9c4 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -125,6 +125,20 @@ const createValidationSchema = (provider: EmbedderProvider, t: any, models: any) .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), codebaseIndexEmbedderModelDimension: z.number().optional(), }) + .refine( + (data) => { + const model = models?.gemini?.[data.codebaseIndexEmbedderModelId || ""] + // If the model supports variable dimensions, a dimension must be provided. + if (model?.minDimension && !data.codebaseIndexEmbedderModelDimension) { + return false // Fails validation if dimension is required but not provided + } + return true + }, + { + message: t("settings:codeIndex.validation.modelDimensionRequired"), + path: ["codebaseIndexEmbedderModelDimension"], + }, + ) .refine( (data) => { const model = models?.gemini?.[data.codebaseIndexEmbedderModelId || ""] @@ -136,9 +150,15 @@ const createValidationSchema = (provider: EmbedderProvider, t: any, models: any) } return true }, - { - message: t("settings:codeIndex.validation.invalidDimension"), - path: ["codebaseIndexEmbedderModelDimension"], + (data) => { + const model = models?.gemini?.[data.codebaseIndexEmbedderModelId || ""] + return { + message: t("settings:codeIndex.validation.invalidDimension", { + min: model?.minDimension, + max: model?.maxDimension, + }), + path: ["codebaseIndexEmbedderModelDimension"], + } }, ) @@ -216,16 +236,13 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexEmbedderModelDimension: codebaseIndexConfig.codebaseIndexEmbedderModelDimension || modelProfile?.defaultDimension, codebaseIndexSearchMaxResults: - codebaseIndexConfig.codebaseIndexSearchMaxResults ?? - CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_RESULTS, + codebaseIndexConfig.codebaseIndexSearchMaxResults ?? CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_RESULTS, codebaseIndexSearchMinScore: - codebaseIndexConfig.codebaseIndexSearchMinScore ?? - CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_MIN_SCORE, + codebaseIndexConfig.codebaseIndexSearchMinScore ?? CODEBASE_INDEX_DEFAULTS.DEFAULT_SEARCH_MIN_SCORE, // Keys are initially set to empty and populated by a separate effect. codeIndexOpenAiKey: "", codeIndexQdrantApiKey: "", - codebaseIndexOpenAiCompatibleBaseUrl: - codebaseIndexConfig.codebaseIndexOpenAiCompatibleBaseUrl || "", + codebaseIndexOpenAiCompatibleBaseUrl: codebaseIndexConfig.codebaseIndexOpenAiCompatibleBaseUrl || "", codebaseIndexOpenAiCompatibleApiKey: "", codebaseIndexGeminiApiKey: "", } @@ -971,55 +988,35 @@ export const CodeIndexPopover: React.FC = ({ currentSettings.codebaseIndexEmbedderModelId ] - // Conditionally render the dimension slider only for Gemini models - // that explicitly define a min and max dimension in their profile. if ( selectedModelProfile?.minDimension && selectedModelProfile?.maxDimension ) { return (
-
- -
-
- - updateSetting( - "codebaseIndexEmbedderModelDimension", - values[0], - ) - } - className="flex-1" - data-testid="model-dimension-slider" - /> - - {currentSettings.codebaseIndexEmbedderModelDimension ?? - selectedModelProfile.defaultDimension ?? - selectedModelProfile.minDimension} - - - updateSetting( - "codebaseIndexEmbedderModelDimension", - selectedModelProfile.defaultDimension, - ) - }> - - -
+ + + updateSetting( + "codebaseIndexEmbedderModelDimension", + e.target.value + ? parseInt(e.target.value, 10) + : undefined, + ) + } + className="w-full" + /> {formErrors.codebaseIndexEmbedderModelDimension && (

{formErrors.codebaseIndexEmbedderModelDimension} diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 25428cfb16..1f9b3e4947 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -95,6 +95,7 @@ "qdrantUrlPlaceholder": "http://localhost:6333", "saveError": "Failed to save settings", "modelDimensions": "({{dimension}} dimensions)", + "dimensionRange": "({{min}}-{{max}})", "saveSuccess": "Settings saved successfully", "saving": "Saving...", "saveSettings": "Save", @@ -115,6 +116,7 @@ "apiKeyRequired": "API key is required", "modelIdRequired": "Model ID is required", "modelDimensionRequired": "Model dimension is required", + "invalidDimension": "Dimension must be between {{min}} and {{max}}", "geminiApiKeyRequired": "Gemini API key is required", "ollamaBaseUrlRequired": "Ollama base URL is required", "baseUrlRequired": "Base URL is required", From 7110ceaf540104958a5d907351cf60c26f98c7e5 Mon Sep 17 00:00:00 2001 From: Nikolas Thiel <21254390+Jopo-JP@users.noreply.github.com> Date: Fri, 25 Jul 2025 15:07:11 +0200 Subject: [PATCH 07/10] Update code-qa.yml --- .github/workflows/code-qa.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/code-qa.yml b/.github/workflows/code-qa.yml index ba85a01b21..88c294f4c3 100644 --- a/.github/workflows/code-qa.yml +++ b/.github/workflows/code-qa.yml @@ -43,14 +43,7 @@ jobs: unit-test: name: platform-unit-test (${{ matrix.name }}) - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: ubuntu-latest - name: ubuntu-latest - - os: windows-latest - name: windows-latest + runs-on: self-hosted steps: - name: Checkout code uses: actions/checkout@v4 From 4abcecdd2292562b35befcb4b1f3ff41f2ac7511 Mon Sep 17 00:00:00 2001 From: Nikolas Thiel <21254390+Jopo-JP@users.noreply.github.com> Date: Fri, 25 Jul 2025 15:08:32 +0200 Subject: [PATCH 08/10] Update code-qa.yml --- .github/workflows/code-qa.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-qa.yml b/.github/workflows/code-qa.yml index 88c294f4c3..6a59b98133 100644 --- a/.github/workflows/code-qa.yml +++ b/.github/workflows/code-qa.yml @@ -30,7 +30,7 @@ jobs: run: pnpm knip compile: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - name: Checkout code uses: actions/checkout@v4 From de043a57b7ba402432a7aea8d6cf23b24e2dad07 Mon Sep 17 00:00:00 2001 From: Nikolas Thiel <21254390+Jopo-JP@users.noreply.github.com> Date: Mon, 28 Jul 2025 18:07:15 +0200 Subject: [PATCH 09/10] revert: unwanted changes to workflow I removed it as i run github runners locally --- .github/workflows/code-qa.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-qa.yml b/.github/workflows/code-qa.yml index 6a59b98133..ba85a01b21 100644 --- a/.github/workflows/code-qa.yml +++ b/.github/workflows/code-qa.yml @@ -30,7 +30,7 @@ jobs: run: pnpm knip compile: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 @@ -43,7 +43,14 @@ jobs: unit-test: name: platform-unit-test (${{ matrix.name }}) - runs-on: self-hosted + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + name: ubuntu-latest + - os: windows-latest + name: windows-latest steps: - name: Checkout code uses: actions/checkout@v4 From 4b52f4156928b690a612d23d38b313ed2ea48c2d Mon Sep 17 00:00:00 2001 From: Jopo-JP <21254390+Jopo-JP@users.noreply.github.com> Date: Wed, 30 Jul 2025 02:59:26 +0200 Subject: [PATCH 10/10] fix: resolve merge conflict in codebase-index.ts --- packages/types/src/codebase-index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/types/src/codebase-index.ts b/packages/types/src/codebase-index.ts index d9922a53ed..cfd83611fb 100644 --- a/packages/types/src/codebase-index.ts +++ b/packages/types/src/codebase-index.ts @@ -21,7 +21,7 @@ export const CODEBASE_INDEX_DEFAULTS = { export const codebaseIndexConfigSchema = z.object({ codebaseIndexEnabled: z.boolean().optional(), codebaseIndexQdrantUrl: z.string().optional(), - codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible", "gemini"]).optional(), + codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible", "gemini", "mistral"]).optional(), codebaseIndexEmbedderBaseUrl: z.string().optional(), codebaseIndexEmbedderModelId: z.string().optional(), codebaseIndexEmbedderModelDimension: z.number().optional(), @@ -60,6 +60,7 @@ export const codebaseIndexModelsSchema = z.object({ ollama: z.record(z.string(), modelProfileSchema).optional(), "openai-compatible": z.record(z.string(), modelProfileSchema).optional(), gemini: z.record(z.string(), modelProfileSchema).optional(), + mistral: z.record(z.string(), modelProfileSchema).optional(), }) export type CodebaseIndexModels = z.infer @@ -75,6 +76,7 @@ export const codebaseIndexProviderSchema = z.object({ codebaseIndexOpenAiCompatibleApiKey: z.string().optional(), codebaseIndexOpenAiCompatibleModelDimension: z.number().optional(), codebaseIndexGeminiApiKey: z.string().optional(), + codebaseIndexMistralApiKey: z.string().optional(), }) export type CodebaseIndexProvider = z.infer