From 1819bd1c3990b45ae9bc45a208d69fab20fcbd49 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 28 Jul 2025 16:36:41 +0000 Subject: [PATCH 1/5] feat: add Vertex AI as embedder provider for codebase indexing - Add "vertex" to EmbedderProvider type - Add Vertex AI embedding models to EMBEDDING_MODEL_PROFILES - Create VertexEmbedder implementation using OpenAI-compatible approach - Update service factory to handle vertex provider - Add vertexOptions to CodeIndexConfig interface - Update CodeIndexPopover UI to include Vertex AI section - Add translation keys for Vertex AI - Add VERTEX_MAX_ITEM_TOKENS constant - Add comprehensive tests for VertexEmbedder Closes #6300 --- src/i18n/locales/en/embeddings.json | 1 + src/services/code-index/constants/index.ts | 3 + .../embedders/__tests__/vertex.spec.ts | 193 ++++++++++++++++++ src/services/code-index/embedders/vertex.ts | 94 +++++++++ src/services/code-index/interfaces/config.ts | 2 + .../code-index/interfaces/embedder.ts | 2 +- src/services/code-index/service-factory.ts | 6 + src/shared/embeddingModels.ts | 11 +- .../src/components/chat/CodeIndexPopover.tsx | 85 +++++++- webview-ui/src/i18n/locales/en/settings.json | 4 + 10 files changed, 398 insertions(+), 3 deletions(-) create mode 100644 src/services/code-index/embedders/__tests__/vertex.spec.ts create mode 100644 src/services/code-index/embedders/vertex.ts diff --git a/src/i18n/locales/en/embeddings.json b/src/i18n/locales/en/embeddings.json index 66465d8c35..c2b31e4b9b 100644 --- a/src/i18n/locales/en/embeddings.json +++ b/src/i18n/locales/en/embeddings.json @@ -47,6 +47,7 @@ "openAiCompatibleConfigMissing": "OpenAI Compatible configuration missing for embedder creation", "geminiConfigMissing": "Gemini configuration missing for embedder creation", "mistralConfigMissing": "Mistral configuration missing for embedder creation", + "vertexConfigMissing": "Vertex AI configuration missing for embedder creation", "invalidEmbedderType": "Invalid embedder type configured: {{embedderProvider}}", "vectorDimensionNotDeterminedOpenAiCompatible": "Could not determine vector dimension for model '{{modelId}}' with provider '{{provider}}'. Please ensure the 'Embedding Dimension' is correctly set in the OpenAI-Compatible provider settings.", "vectorDimensionNotDetermined": "Could not determine vector dimension for model '{{modelId}}' with provider '{{provider}}'. Check model profiles or configuration.", diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts index 6f0e0fe7e6..1356b6b2f6 100644 --- a/src/services/code-index/constants/index.ts +++ b/src/services/code-index/constants/index.ts @@ -29,3 +29,6 @@ export const BATCH_PROCESSING_CONCURRENCY = 10 /**Gemini Embedder */ export const GEMINI_MAX_ITEM_TOKENS = 2048 + +/**Vertex AI Embedder */ +export const VERTEX_MAX_ITEM_TOKENS = 2048 diff --git a/src/services/code-index/embedders/__tests__/vertex.spec.ts b/src/services/code-index/embedders/__tests__/vertex.spec.ts new file mode 100644 index 0000000000..743bd3695a --- /dev/null +++ b/src/services/code-index/embedders/__tests__/vertex.spec.ts @@ -0,0 +1,193 @@ +import { vitest, describe, it, expect, beforeEach } from "vitest" +import type { MockedClass } from "vitest" +import { VertexEmbedder } from "../vertex" +import { OpenAICompatibleEmbedder } from "../openai-compatible" + +// Mock the OpenAICompatibleEmbedder +vitest.mock("../openai-compatible") + +// Mock TelemetryService +vitest.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + instance: { + captureEvent: vitest.fn(), + }, + }, +})) + +const MockedOpenAICompatibleEmbedder = OpenAICompatibleEmbedder as MockedClass + +describe("VertexEmbedder", () => { + let embedder: VertexEmbedder + + beforeEach(() => { + vitest.clearAllMocks() + }) + + describe("constructor", () => { + it("should create an instance with default model when no model specified", () => { + // Arrange + const apiKey = "test-vertex-api-key" + + // Act + embedder = new VertexEmbedder(apiKey) + + // Assert + expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith( + "https://generativelanguage.googleapis.com/v1beta/openai/", + apiKey, + "text-embedding-004", + 2048, + ) + }) + + it("should create an instance with specified model", () => { + // Arrange + const apiKey = "test-vertex-api-key" + const modelId = "text-multilingual-embedding-002" + + // Act + embedder = new VertexEmbedder(apiKey, modelId) + + // Assert + expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith( + "https://generativelanguage.googleapis.com/v1beta/openai/", + apiKey, + "text-multilingual-embedding-002", + 2048, + ) + }) + + it("should throw error when API key is not provided", () => { + // Act & Assert + expect(() => new VertexEmbedder("")).toThrow("validation.apiKeyRequired") + expect(() => new VertexEmbedder(null as any)).toThrow("validation.apiKeyRequired") + expect(() => new VertexEmbedder(undefined as any)).toThrow("validation.apiKeyRequired") + }) + }) + + describe("embedderInfo", () => { + it("should return correct embedder info", () => { + // Arrange + embedder = new VertexEmbedder("test-api-key") + + // Act + const info = embedder.embedderInfo + + // Assert + expect(info).toEqual({ + name: "vertex", + }) + }) + + describe("createEmbeddings", () => { + let mockCreateEmbeddings: any + + beforeEach(() => { + mockCreateEmbeddings = vitest.fn() + MockedOpenAICompatibleEmbedder.prototype.createEmbeddings = mockCreateEmbeddings + }) + + it("should use instance model when no model parameter provided", async () => { + // Arrange + embedder = new VertexEmbedder("test-api-key") + const texts = ["test text 1", "test text 2"] + const mockResponse = { + embeddings: [ + [0.1, 0.2], + [0.3, 0.4], + ], + } + mockCreateEmbeddings.mockResolvedValue(mockResponse) + + // Act + const result = await embedder.createEmbeddings(texts) + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "text-embedding-004") + expect(result).toEqual(mockResponse) + }) + + it("should use provided model parameter when specified", async () => { + // Arrange + embedder = new VertexEmbedder("test-api-key", "textembedding-gecko@003") + const texts = ["test text 1", "test text 2"] + const mockResponse = { + embeddings: [ + [0.1, 0.2], + [0.3, 0.4], + ], + } + mockCreateEmbeddings.mockResolvedValue(mockResponse) + + // Act + const result = await embedder.createEmbeddings(texts, "text-multilingual-embedding-002") + + // Assert + expect(mockCreateEmbeddings).toHaveBeenCalledWith(texts, "text-multilingual-embedding-002") + expect(result).toEqual(mockResponse) + }) + + it("should handle errors from OpenAICompatibleEmbedder", async () => { + // Arrange + embedder = new VertexEmbedder("test-api-key") + const texts = ["test text"] + const error = new Error("Embedding failed") + mockCreateEmbeddings.mockRejectedValue(error) + + // Act & Assert + await expect(embedder.createEmbeddings(texts)).rejects.toThrow("Embedding failed") + }) + }) + }) + + describe("validateConfiguration", () => { + let mockValidateConfiguration: any + + beforeEach(() => { + mockValidateConfiguration = vitest.fn() + MockedOpenAICompatibleEmbedder.prototype.validateConfiguration = mockValidateConfiguration + }) + + it("should delegate validation to OpenAICompatibleEmbedder", async () => { + // Arrange + embedder = new VertexEmbedder("test-api-key") + mockValidateConfiguration.mockResolvedValue({ valid: true }) + + // Act + const result = await embedder.validateConfiguration() + + // Assert + expect(mockValidateConfiguration).toHaveBeenCalled() + expect(result).toEqual({ valid: true }) + }) + + it("should pass through validation errors from OpenAICompatibleEmbedder", async () => { + // Arrange + embedder = new VertexEmbedder("test-api-key") + mockValidateConfiguration.mockResolvedValue({ + valid: false, + error: "embeddings:validation.authenticationFailed", + }) + + // Act + const result = await embedder.validateConfiguration() + + // Assert + expect(mockValidateConfiguration).toHaveBeenCalled() + expect(result).toEqual({ + valid: false, + error: "embeddings:validation.authenticationFailed", + }) + }) + + it("should handle validation exceptions", async () => { + // Arrange + embedder = new VertexEmbedder("test-api-key") + mockValidateConfiguration.mockRejectedValue(new Error("Validation failed")) + + // Act & Assert + await expect(embedder.validateConfiguration()).rejects.toThrow("Validation failed") + }) + }) +}) diff --git a/src/services/code-index/embedders/vertex.ts b/src/services/code-index/embedders/vertex.ts new file mode 100644 index 0000000000..adc58d5da6 --- /dev/null +++ b/src/services/code-index/embedders/vertex.ts @@ -0,0 +1,94 @@ +import { OpenAICompatibleEmbedder } from "./openai-compatible" +import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder" +import { VERTEX_MAX_ITEM_TOKENS } from "../constants" +import { t } from "../../../i18n" +import { TelemetryEventName } from "@roo-code/types" +import { TelemetryService } from "@roo-code/telemetry" + +/** + * Vertex AI embedder implementation that wraps the OpenAI Compatible embedder + * with configuration for Google's Vertex AI embedding API. + * + * Supported models: + * - text-embedding-004 (dimension: 768) + * - text-multilingual-embedding-002 (dimension: 768) + * - textembedding-gecko@003 (dimension: 768) + * - textembedding-gecko-multilingual@001 (dimension: 768) + */ +export class VertexEmbedder implements IEmbedder { + private readonly openAICompatibleEmbedder: OpenAICompatibleEmbedder + private static readonly VERTEX_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/" + private static readonly DEFAULT_MODEL = "text-embedding-004" + private readonly modelId: string + + /** + * Creates a new Vertex AI embedder + * @param apiKey The Google AI API key for authentication + * @param modelId The model ID to use (defaults to text-embedding-004) + */ + constructor(apiKey: string, modelId?: string) { + if (!apiKey) { + throw new Error(t("embeddings:validation.apiKeyRequired")) + } + + // Use provided model or default + this.modelId = modelId || VertexEmbedder.DEFAULT_MODEL + + // Create an OpenAI Compatible embedder with Vertex AI's configuration + this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder( + VertexEmbedder.VERTEX_BASE_URL, + apiKey, + this.modelId, + VERTEX_MAX_ITEM_TOKENS, + ) + } + + /** + * Creates embeddings for the given texts using Vertex AI's embedding API + * @param texts Array of text strings to embed + * @param model Optional model identifier (uses constructor model if not provided) + * @returns Promise resolving to embedding response + */ + async createEmbeddings(texts: string[], model?: string): Promise { + try { + // Use the provided model or fall back to the instance's model + const modelToUse = model || this.modelId + return await this.openAICompatibleEmbedder.createEmbeddings(texts, modelToUse) + } catch (error) { + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: error instanceof Error ? error.message : String(error), + stack: error instanceof Error ? error.stack : undefined, + location: "VertexEmbedder:createEmbeddings", + }) + throw error + } + } + + /** + * Validates the Vertex AI embedder configuration by delegating to the underlying OpenAI-compatible embedder + * @returns Promise resolving to validation result with success status and optional error message + */ + async validateConfiguration(): Promise<{ valid: boolean; error?: string }> { + try { + // Delegate validation to the OpenAI-compatible embedder + // The error messages will be specific to Vertex AI since we're using Vertex AI's base URL + return await this.openAICompatibleEmbedder.validateConfiguration() + } catch (error) { + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: error instanceof Error ? error.message : String(error), + stack: error instanceof Error ? error.stack : undefined, + location: "VertexEmbedder:validateConfiguration", + }) + throw error + } + } + + /** + * Returns information about this embedder + */ + get embedderInfo(): EmbedderInfo { + return { + name: "vertex", + } + } +} diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts index 9098a60091..32a0085d22 100644 --- a/src/services/code-index/interfaces/config.ts +++ b/src/services/code-index/interfaces/config.ts @@ -14,6 +14,7 @@ export interface CodeIndexConfig { openAiCompatibleOptions?: { baseUrl: string; apiKey: string } geminiOptions?: { apiKey: string } mistralOptions?: { apiKey: string } + vertexOptions?: { apiKey: string } qdrantUrl?: string qdrantApiKey?: string searchMinScore?: number @@ -35,6 +36,7 @@ export type PreviousConfigSnapshot = { openAiCompatibleApiKey?: string geminiApiKey?: string mistralApiKey?: string + vertexApiKey?: string qdrantUrl?: string qdrantApiKey?: string } diff --git a/src/services/code-index/interfaces/embedder.ts b/src/services/code-index/interfaces/embedder.ts index c5653ea2b7..3ea92a3c77 100644 --- a/src/services/code-index/interfaces/embedder.ts +++ b/src/services/code-index/interfaces/embedder.ts @@ -28,7 +28,7 @@ export interface EmbeddingResponse { } } -export type AvailableEmbedders = "openai" | "ollama" | "openai-compatible" | "gemini" | "mistral" +export type AvailableEmbedders = "openai" | "ollama" | "openai-compatible" | "gemini" | "mistral" | "vertex" export interface EmbedderInfo { name: AvailableEmbedders diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 68b0f5c0bc..d0a60c44fd 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -4,6 +4,7 @@ import { CodeIndexOllamaEmbedder } from "./embedders/ollama" import { OpenAICompatibleEmbedder } from "./embedders/openai-compatible" import { GeminiEmbedder } from "./embedders/gemini" import { MistralEmbedder } from "./embedders/mistral" +import { VertexEmbedder } from "./embedders/vertex" import { EmbedderProvider, getDefaultModelId, getModelDimension } from "../../shared/embeddingModels" import { QdrantVectorStore } from "./vector-store/qdrant-client" import { codeParser, DirectoryScanner, FileWatcher } from "./processors" @@ -70,6 +71,11 @@ export class CodeIndexServiceFactory { throw new Error(t("embeddings:serviceFactory.mistralConfigMissing")) } return new MistralEmbedder(config.mistralOptions.apiKey, config.modelId) + } else if (provider === "vertex") { + if (!config.vertexOptions?.apiKey) { + throw new Error(t("embeddings:serviceFactory.vertexConfigMissing")) + } + return new VertexEmbedder(config.vertexOptions.apiKey, config.modelId) } throw new Error( diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index a3cd61e659..b14ab68981 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -2,7 +2,7 @@ * Defines profiles for different embedding models, including their dimensions. */ -export type EmbedderProvider = "openai" | "ollama" | "openai-compatible" | "gemini" | "mistral" // Add other providers as needed +export type EmbedderProvider = "openai" | "ollama" | "openai-compatible" | "gemini" | "mistral" | "vertex" // Add other providers as needed export interface EmbeddingModelProfile { dimension: number @@ -53,6 +53,12 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { mistral: { "codestral-embed-2505": { dimension: 1536, scoreThreshold: 0.4 }, }, + vertex: { + "text-embedding-004": { dimension: 768, scoreThreshold: 0.4 }, + "text-multilingual-embedding-002": { dimension: 768, scoreThreshold: 0.4 }, + "textembedding-gecko@003": { dimension: 768, scoreThreshold: 0.4 }, + "textembedding-gecko-multilingual@001": { dimension: 768, scoreThreshold: 0.4 }, + }, } /** @@ -143,6 +149,9 @@ export function getDefaultModelId(provider: EmbedderProvider): string { case "mistral": return "codestral-embed-2505" + case "vertex": + return "text-embedding-004" + default: // Fallback for unknown providers console.warn(`Unknown provider for default model ID: ${provider}. Falling back to OpenAI default.`) diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index d7683e8c7e..8ee7fd8823 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -69,6 +69,7 @@ interface LocalCodeIndexSettings { codebaseIndexOpenAiCompatibleApiKey?: string codebaseIndexGeminiApiKey?: string codebaseIndexMistralApiKey?: string + codebaseIndexVertexApiKey?: string } // Validation schema for codebase index settings @@ -135,6 +136,14 @@ const createValidationSchema = (provider: EmbedderProvider, t: any) => { .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), }) + case "vertex": + return baseSchema.extend({ + codebaseIndexVertexApiKey: z.string().min(1, t("settings:codeIndex.validation.vertexApiKeyRequired")), + codebaseIndexEmbedderModelId: z + .string() + .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), + }) + default: return baseSchema } @@ -179,6 +188,7 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexOpenAiCompatibleApiKey: "", codebaseIndexGeminiApiKey: "", codebaseIndexMistralApiKey: "", + codebaseIndexVertexApiKey: "", }) // Initial settings state - stores the settings when popover opens @@ -213,6 +223,7 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexOpenAiCompatibleApiKey: "", codebaseIndexGeminiApiKey: "", codebaseIndexMistralApiKey: "", + codebaseIndexVertexApiKey: "", } setInitialSettings(settings) setCurrentSettings(settings) @@ -307,6 +318,9 @@ export const CodeIndexPopover: React.FC = ({ if (!prev.codebaseIndexMistralApiKey || prev.codebaseIndexMistralApiKey === SECRET_PLACEHOLDER) { updated.codebaseIndexMistralApiKey = secretStatus.hasMistralApiKey ? SECRET_PLACEHOLDER : "" } + if (!prev.codebaseIndexVertexApiKey || prev.codebaseIndexVertexApiKey === SECRET_PLACEHOLDER) { + updated.codebaseIndexVertexApiKey = secretStatus.hasVertexApiKey ? SECRET_PLACEHOLDER : "" + } return updated } @@ -379,7 +393,8 @@ export const CodeIndexPopover: React.FC = ({ key === "codeIndexOpenAiKey" || key === "codebaseIndexOpenAiCompatibleApiKey" || key === "codebaseIndexGeminiApiKey" || - key === "codebaseIndexMistralApiKey" + key === "codebaseIndexMistralApiKey" || + key === "codebaseIndexVertexApiKey" ) { dataToValidate[key] = "placeholder-valid" } @@ -624,6 +639,9 @@ export const CodeIndexPopover: React.FC = ({ {t("settings:codeIndex.mistralProvider")} + + {t("settings:codeIndex.vertexProvider")} + @@ -1016,6 +1034,71 @@ export const CodeIndexPopover: React.FC = ({ )} + {currentSettings.codebaseIndexEmbedderProvider === "vertex" && ( + <> +
+ + + updateSetting("codebaseIndexVertexApiKey", e.target.value) + } + placeholder={t("settings:codeIndex.vertexApiKeyPlaceholder")} + className={cn("w-full", { + "border-red-500": formErrors.codebaseIndexVertexApiKey, + })} + /> + {formErrors.codebaseIndexVertexApiKey && ( +

+ {formErrors.codebaseIndexVertexApiKey} +

+ )} +
+ +
+ + + updateSetting("codebaseIndexEmbedderModelId", e.target.value) + } + className={cn("w-full", { + "border-red-500": formErrors.codebaseIndexEmbedderModelId, + })}> + + {t("settings:codeIndex.selectModel")} + + {getAvailableModels().map((modelId) => { + const model = + codebaseIndexModels?.[ + currentSettings.codebaseIndexEmbedderProvider + ]?.[modelId] + return ( + + {modelId}{" "} + {model + ? t("settings:codeIndex.modelDimensions", { + dimension: model.dimension, + }) + : ""} + + ) + })} + + {formErrors.codebaseIndexEmbedderModelId && ( +

+ {formErrors.codebaseIndexEmbedderModelId} +

+ )} +
+ + )} + {/* Qdrant Settings */}