From fe877f9f3a645071d324ab91d46e58042bb57880 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 14 Jul 2025 12:53:50 -0500 Subject: [PATCH 1/2] feat: add gemini-embedding-001 model to code-index service - Add gemini-embedding-001 with 3072 dimensions to EMBEDDING_MODEL_PROFILES - Update GeminiEmbedder to support multiple models via constructor parameter - Set gemini-embedding-001 as the default Gemini model - Update service-factory to pass modelId to GeminiEmbedder - Remove hardcoded dimension logic for Gemini provider - Update tests to support new model parameter and correct dimensions The new model provides state-of-the-art performance across English, multilingual and code tasks, unifying previously specialized models. --- .../__tests__/service-factory.spec.ts | 57 +++++++++++-- .../embedders/__tests__/gemini.spec.ts | 82 ++++++++++++++++++- src/services/code-index/embedders/gemini.ts | 37 ++++----- src/services/code-index/service-factory.ts | 5 +- src/shared/embeddingModels.ts | 3 +- 5 files changed, 149 insertions(+), 35 deletions(-) diff --git a/src/services/code-index/__tests__/service-factory.spec.ts b/src/services/code-index/__tests__/service-factory.spec.ts index d65d99f6231..373b0e3e827 100644 --- a/src/services/code-index/__tests__/service-factory.spec.ts +++ b/src/services/code-index/__tests__/service-factory.spec.ts @@ -265,7 +265,7 @@ describe("CodeIndexServiceFactory", () => { expect(() => factory.createEmbedder()).toThrow("serviceFactory.openAiCompatibleConfigMissing") }) - it("should create GeminiEmbedder when using Gemini provider", () => { + it("should create GeminiEmbedder with default model when no modelId specified", () => { // Arrange const testConfig = { embedderProvider: "gemini", @@ -279,7 +279,25 @@ describe("CodeIndexServiceFactory", () => { factory.createEmbedder() // Assert - expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key") + expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key", undefined) + }) + + it("should create GeminiEmbedder with specified modelId", () => { + // Arrange + const testConfig = { + embedderProvider: "gemini", + modelId: "text-embedding-004", + geminiOptions: { + apiKey: "test-gemini-api-key", + }, + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + + // Act + factory.createEmbedder() + + // Assert + expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key", "text-embedding-004") }) it("should throw error when Gemini API key is missing", () => { @@ -507,26 +525,51 @@ describe("CodeIndexServiceFactory", () => { ) }) - it("should use fixed dimension 768 for Gemini provider", () => { + it("should use model-specific dimension for Gemini provider", () => { // Arrange const testConfig = { embedderProvider: "gemini", - modelId: "text-embedding-004", // This is ignored by Gemini + modelId: "gemini-embedding-001", qdrantUrl: "http://localhost:6333", qdrantApiKey: "test-key", } mockConfigManager.getConfig.mockReturnValue(testConfig as any) + mockGetModelDimension.mockReturnValue(3072) // Act factory.createVectorStore() // Assert - // getModelDimension should not be called for Gemini - expect(mockGetModelDimension).not.toHaveBeenCalled() + expect(mockGetModelDimension).toHaveBeenCalledWith("gemini", "gemini-embedding-001") expect(MockedQdrantVectorStore).toHaveBeenCalledWith( "/test/workspace", "http://localhost:6333", - 768, // Fixed dimension for Gemini + 3072, + "test-key", + ) + }) + + it("should use default model dimension for Gemini when modelId not specified", () => { + // Arrange + const testConfig = { + embedderProvider: "gemini", + qdrantUrl: "http://localhost:6333", + qdrantApiKey: "test-key", + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + mockGetDefaultModelId.mockReturnValue("gemini-embedding-001") + mockGetModelDimension.mockReturnValue(3072) + + // Act + factory.createVectorStore() + + // Assert + expect(mockGetDefaultModelId).toHaveBeenCalledWith("gemini") + expect(mockGetModelDimension).toHaveBeenCalledWith("gemini", "gemini-embedding-001") + expect(MockedQdrantVectorStore).toHaveBeenCalledWith( + "/test/workspace", + "http://localhost:6333", + 3072, "test-key", ) }) diff --git a/src/services/code-index/embedders/__tests__/gemini.spec.ts b/src/services/code-index/embedders/__tests__/gemini.spec.ts index 378e6e7d95b..d41a4dc1e93 100644 --- a/src/services/code-index/embedders/__tests__/gemini.spec.ts +++ b/src/services/code-index/embedders/__tests__/gemini.spec.ts @@ -25,13 +25,30 @@ describe("GeminiEmbedder", () => { }) describe("constructor", () => { - it("should create an instance with correct fixed values passed to OpenAICompatibleEmbedder", () => { + it("should create an instance with default model when no model specified", () => { // Arrange const apiKey = "test-gemini-api-key" // Act embedder = new GeminiEmbedder(apiKey) + // Assert + expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith( + "https://generativelanguage.googleapis.com/v1beta/openai/", + apiKey, + "gemini-embedding-001", + 2048, + ) + }) + + it("should create an instance with specified model", () => { + // Arrange + const apiKey = "test-gemini-api-key" + const modelId = "text-embedding-004" + + // Act + embedder = new GeminiEmbedder(apiKey, modelId) + // Assert expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith( "https://generativelanguage.googleapis.com/v1beta/openai/", @@ -50,7 +67,7 @@ describe("GeminiEmbedder", () => { }) describe("embedderInfo", () => { - it("should return correct embedder info with dimension 768", () => { + it("should return correct embedder info", () => { // Arrange embedder = new GeminiEmbedder("test-api-key") @@ -61,7 +78,66 @@ describe("GeminiEmbedder", () => { expect(info).toEqual({ name: "gemini", }) - expect(GeminiEmbedder.dimension).toBe(768) + }) + + describe("createEmbeddings", () => { + let mockCreateEmbeddings: any + + beforeEach(() => { + mockCreateEmbeddings = vitest.fn() + MockedOpenAICompatibleEmbedder.prototype.createEmbeddings = mockCreateEmbeddings + }) + + it("should use instance model when no model parameter provided", async () => { + // Arrange + embedder = new GeminiEmbedder("test-api-key") + const texts = ["test text 1", "test text 2"] + const mockResponse = { + embeddings: [ + [0.1, 0.2], + [0.3, 0.4], + ], + } + mockCreateEmbeddings.mockResolvedValue(mockResponse) + + // Act + const result = await embedder.createEmbeddings(texts) + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001") + expect(result).toEqual(mockResponse) + }) + + it("should use provided model parameter when specified", async () => { + // Arrange + embedder = new GeminiEmbedder("test-api-key", "text-embedding-004") + const texts = ["test text 1", "test text 2"] + const mockResponse = { + embeddings: [ + [0.1, 0.2], + [0.3, 0.4], + ], + } + mockCreateEmbeddings.mockResolvedValue(mockResponse) + + // Act + const result = await embedder.createEmbeddings(texts, "gemini-embedding-001") + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "gemini-embedding-001") + expect(result).toEqual(mockResponse) + }) + + it("should handle errors from OpenAICompatibleEmbedder", async () => { + // Arrange + embedder = new GeminiEmbedder("test-api-key") + const texts = ["test text"] + const error = new Error("Embedding failed") + mockCreateEmbeddings.mockRejectedValue(error) + + // Act & Assert + await expect(embedder.createEmbeddings(texts)).rejects.toThrow("Embedding failed") + }) }) }) diff --git a/src/services/code-index/embedders/gemini.ts b/src/services/code-index/embedders/gemini.ts index fcca4c0fdad..68f8fc867d1 100644 --- a/src/services/code-index/embedders/gemini.ts +++ b/src/services/code-index/embedders/gemini.ts @@ -7,33 +7,36 @@ import { TelemetryService } from "@roo-code/telemetry" /** * Gemini embedder implementation that wraps the OpenAI Compatible embedder - * with fixed configuration for Google's Gemini embedding API. + * with configuration for Google's Gemini embedding API. * - * Fixed values: - * - Base URL: https://generativelanguage.googleapis.com/v1beta/openai/ - * - Model: text-embedding-004 - * - Dimension: 768 + * Supported models: + * - text-embedding-004 (dimension: 768) + * - gemini-embedding-001 (dimension: 2048) */ export class GeminiEmbedder implements IEmbedder { private readonly openAICompatibleEmbedder: OpenAICompatibleEmbedder private static readonly GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/" - private static readonly GEMINI_MODEL = "text-embedding-004" - private static readonly GEMINI_DIMENSION = 768 + private static readonly DEFAULT_MODEL = "gemini-embedding-001" + private readonly modelId: string /** * Creates a new Gemini embedder * @param apiKey The Gemini API key for authentication + * @param modelId The model ID to use (defaults to text-embedding-004) */ - constructor(apiKey: string) { + constructor(apiKey: string, modelId?: string) { if (!apiKey) { throw new Error(t("embeddings:validation.apiKeyRequired")) } - // Create an OpenAI Compatible embedder with Gemini's fixed configuration + // Use provided model or default + this.modelId = modelId || GeminiEmbedder.DEFAULT_MODEL + + // Create an OpenAI Compatible embedder with Gemini's configuration this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder( GeminiEmbedder.GEMINI_BASE_URL, apiKey, - GeminiEmbedder.GEMINI_MODEL, + this.modelId, GEMINI_MAX_ITEM_TOKENS, ) } @@ -41,13 +44,14 @@ export class GeminiEmbedder implements IEmbedder { /** * Creates embeddings for the given texts using Gemini's embedding API * @param texts Array of text strings to embed - * @param model Optional model identifier (ignored - always uses text-embedding-004) + * @param model Optional model identifier (uses constructor model if not provided) * @returns Promise resolving to embedding response */ async createEmbeddings(texts: string[], model?: string): Promise { try { - // Always use the fixed Gemini model, ignoring any passed model parameter - return await this.openAICompatibleEmbedder.createEmbeddings(texts, GeminiEmbedder.GEMINI_MODEL) + // Use the provided model or fall back to the instance's model + const modelToUse = model || this.modelId + return await this.openAICompatibleEmbedder.createEmbeddings(texts, modelToUse) } catch (error) { TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { error: error instanceof Error ? error.message : String(error), @@ -85,11 +89,4 @@ export class GeminiEmbedder implements IEmbedder { name: "gemini", } } - - /** - * Gets the fixed dimension for Gemini embeddings - */ - static get dimension(): number { - return GeminiEmbedder.GEMINI_DIMENSION - } } diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index a741aaf72a7..ec8b1e7ade7 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -63,7 +63,7 @@ export class CodeIndexServiceFactory { if (!config.geminiOptions?.apiKey) { throw new Error(t("embeddings:serviceFactory.geminiConfigMissing")) } - return new GeminiEmbedder(config.geminiOptions.apiKey) + return new GeminiEmbedder(config.geminiOptions.apiKey, config.modelId) } throw new Error( @@ -111,9 +111,6 @@ export class CodeIndexServiceFactory { // First check if a manual dimension is provided (works for all providers) if (config.modelDimension && config.modelDimension > 0) { vectorSize = config.modelDimension - } else if (provider === "gemini") { - // Gemini's text-embedding-004 has a fixed dimension of 768 - vectorSize = 768 } else { // Fall back to model-specific dimension from profiles vectorSize = getModelDimension(provider, modelId) diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 4c6bc24319e..f387480c65d 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -48,6 +48,7 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { }, gemini: { "text-embedding-004": { dimension: 768 }, + "gemini-embedding-001": { dimension: 3072, scoreThreshold: 0.4 }, }, } @@ -134,7 +135,7 @@ export function getDefaultModelId(provider: EmbedderProvider): string { } case "gemini": - return "text-embedding-004" + return "gemini-embedding-001" default: // Fallback for unknown providers From 4c71f2786c081906727d47d0c1bba8fc7b11c046 Mon Sep 17 00:00:00 2001 From: Daniel <57051444+daniel-lxs@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:00:10 -0500 Subject: [PATCH 2/2] Update src/services/code-index/embedders/gemini.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- src/services/code-index/embedders/gemini.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/code-index/embedders/gemini.ts b/src/services/code-index/embedders/gemini.ts index 68f8fc867d1..7e795875c9d 100644 --- a/src/services/code-index/embedders/gemini.ts +++ b/src/services/code-index/embedders/gemini.ts @@ -22,7 +22,7 @@ export class GeminiEmbedder implements IEmbedder { /** * Creates a new Gemini embedder * @param apiKey The Gemini API key for authentication - * @param modelId The model ID to use (defaults to text-embedding-004) + * @param modelId The model ID to use (defaults to gemini-embedding-001) */ constructor(apiKey: string, modelId?: string) { if (!apiKey) {