Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion packages/types/src/codebase-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ export const CODEBASE_INDEX_DEFAULTS = {
export const codebaseIndexConfigSchema = z.object({
codebaseIndexEnabled: z.boolean().optional(),
codebaseIndexQdrantUrl: z.string().optional(),
codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible", "gemini"]).optional(),
codebaseIndexVectorStoreType: z.enum(["qdrant", "local"]).optional(),
codebaseIndexLocalVectorStorePath: z.string().optional(),
codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible", "gemini", "fastembed"]).optional(),
codebaseIndexEmbedderBaseUrl: z.string().optional(),
codebaseIndexEmbedderModelId: z.string().optional(),
codebaseIndexEmbedderModelDimension: z.number().optional(),
Expand All @@ -47,6 +49,7 @@ export const codebaseIndexModelsSchema = z.object({
ollama: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
"openai-compatible": z.record(z.string(), z.object({ dimension: z.number() })).optional(),
gemini: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
fastembed: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
})

export type CodebaseIndexModels = z.infer<typeof codebaseIndexModelsSchema>
Expand Down
3,142 changes: 3,107 additions & 35 deletions pnpm-lock.yaml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@
"@aws-sdk/credential-providers": "^3.806.0",
"@google/genai": "^1.0.0",
"@lmstudio/sdk": "^1.1.1",
"@mastra/fastembed": "^0.10.1",
"@mastra/libsql": "^0.11.0",
"@mistralai/mistralai": "^1.3.6",
"@modelcontextprotocol/sdk": "^1.9.0",
"@qdrant/js-client-rest": "^1.14.0",
Expand Down
30 changes: 21 additions & 9 deletions src/services/code-index/config-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ export class CodeIndexConfigManager {
private geminiOptions?: { apiKey: string }
private qdrantUrl?: string = "http://localhost:6333"
private qdrantApiKey?: string
private vectorStoreType?: "qdrant" | "local"
private localVectorStorePath?: string
private searchMinScore?: number
private searchMaxResults?: number

Expand Down Expand Up @@ -54,6 +56,8 @@ export class CodeIndexConfigManager {
const {
codebaseIndexEnabled,
codebaseIndexQdrantUrl,
codebaseIndexVectorStoreType,
codebaseIndexLocalVectorStorePath,
codebaseIndexEmbedderProvider,
codebaseIndexEmbedderBaseUrl,
codebaseIndexEmbedderModelId,
Expand All @@ -72,6 +76,8 @@ export class CodeIndexConfigManager {
this.codebaseIndexEnabled = codebaseIndexEnabled ?? true
this.qdrantUrl = codebaseIndexQdrantUrl
this.qdrantApiKey = qdrantApiKey ?? ""
this.vectorStoreType = codebaseIndexVectorStoreType as "qdrant" | "local" | undefined
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config manager now reads new fields for vectorStoreType and localVectorStorePath. Consider validating that when vectorStoreType is 'local', a valid localVectorStorePath is provided or a sensible default is applied.

this.localVectorStorePath = codebaseIndexLocalVectorStorePath
this.searchMinScore = codebaseIndexSearchMinScore
this.searchMaxResults = codebaseIndexSearchMaxResults

Expand All @@ -93,13 +99,15 @@ export class CodeIndexConfigManager {

this.openAiOptions = { openAiNativeApiKey: openAiKey }

// Set embedder provider with support for openai-compatible
// Set embedder provider with support for openai-compatible and fastembed
if (codebaseIndexEmbedderProvider === "ollama") {
this.embedderProvider = "ollama"
} else if (codebaseIndexEmbedderProvider === "openai-compatible") {
this.embedderProvider = "openai-compatible"
} else if (codebaseIndexEmbedderProvider === "gemini") {
this.embedderProvider = "gemini"
} else if (codebaseIndexEmbedderProvider === "fastembed") {
this.embedderProvider = "fastembed"
} else {
this.embedderProvider = "openai"
}
Expand Down Expand Up @@ -188,26 +196,28 @@ export class CodeIndexConfigManager {
* Checks if the service is properly configured based on the embedder type.
*/
public isConfigured(): boolean {
// Check if we have a vector store configured (either Qdrant or local)
const hasVectorStore = this.qdrantUrl || this.vectorStoreType === "local"

if (this.embedderProvider === "openai") {
const openAiKey = this.openAiOptions?.openAiNativeApiKey
const qdrantUrl = this.qdrantUrl
return !!(openAiKey && qdrantUrl)
return !!(openAiKey && hasVectorStore)
} else if (this.embedderProvider === "ollama") {
// Ollama model ID has a default, so only base URL is strictly required for config
const ollamaBaseUrl = this.ollamaOptions?.ollamaBaseUrl
const qdrantUrl = this.qdrantUrl
return !!(ollamaBaseUrl && qdrantUrl)
return !!(ollamaBaseUrl && hasVectorStore)
} else if (this.embedderProvider === "openai-compatible") {
const baseUrl = this.openAiCompatibleOptions?.baseUrl
const apiKey = this.openAiCompatibleOptions?.apiKey
const qdrantUrl = this.qdrantUrl
const isConfigured = !!(baseUrl && apiKey && qdrantUrl)
const isConfigured = !!(baseUrl && apiKey && hasVectorStore)
return isConfigured
} else if (this.embedderProvider === "gemini") {
const apiKey = this.geminiOptions?.apiKey
const qdrantUrl = this.qdrantUrl
const isConfigured = !!(apiKey && qdrantUrl)
const isConfigured = !!(apiKey && hasVectorStore)
return isConfigured
} else if (this.embedderProvider === "fastembed") {
// FastEmbed is local and doesn't require API keys, just a vector store
return !!hasVectorStore
}
return false // Should not happen if embedderProvider is always set correctly
}
Expand Down Expand Up @@ -353,6 +363,8 @@ export class CodeIndexConfigManager {
geminiOptions: this.geminiOptions,
qdrantUrl: this.qdrantUrl,
qdrantApiKey: this.qdrantApiKey,
vectorStoreType: this.vectorStoreType,
localVectorStorePath: this.localVectorStorePath,
searchMinScore: this.currentSearchMinScore,
searchMaxResults: this.currentSearchMaxResults,
}
Expand Down
243 changes: 243 additions & 0 deletions src/services/code-index/embedders/__tests__/fastembed.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
// npx vitest services/code-index/embedders/__tests__/fastembed.spec.ts

import { describe, it, expect, beforeEach, vi } from "vitest"
import { FastEmbedEmbedder } from "../fastembed"

// Mock TelemetryService
vi.mock("@roo-code/telemetry", () => ({
TelemetryService: {
instance: {
captureEvent: vi.fn(),
},
},
}))

// Mock i18n
vi.mock("../../../i18n", () => ({
t: vi.fn((key: string, params?: any) => {
if (key === "embeddings:fastembed.modelNotSupported") {
return `Model "${params?.model}" not supported. Available models: ${params?.availableModels}`
}
if (key === "embeddings:fastembed.embeddingFailed") {
return `Failed to create embeddings with FastEmbed: ${params?.message}`
}
if (key === "embeddings:fastembed.noValidTexts") {
return "No valid texts to embed"
}
if (key === "embeddings:fastembed.invalidResponseFormat") {
return "Invalid response format from FastEmbed"
}
if (key === "embeddings:fastembed.invalidEmbeddingFormat") {
return "Invalid embedding format from FastEmbed"
}
return key
}),
}))

// Mock getModelQueryPrefix
vi.mock("../../../shared/embeddingModels", () => ({
getModelQueryPrefix: vi.fn(() => null),
}))

// Mock @mastra/fastembed
vi.mock("@mastra/fastembed", () => ({
fastembed: {
small: {
doEmbed: vi.fn(),
maxEmbeddingsPerCall: 256,
},
base: {
doEmbed: vi.fn(),
maxEmbeddingsPerCall: 256,
},
},
}))

describe("FastEmbedEmbedder", () => {
let embedder: FastEmbedEmbedder
let mockSmallDoEmbed: any
let mockBaseDoEmbed: any

beforeEach(() => {
vi.clearAllMocks()

// Get references to the mocked functions
const { fastembed } = require("@mastra/fastembed")
mockSmallDoEmbed = fastembed.small.doEmbed
mockBaseDoEmbed = fastembed.base.doEmbed
})

describe("constructor", () => {
it("should initialize with default model (bge-small-en-v1.5)", () => {
embedder = new FastEmbedEmbedder({})
expect(embedder.embedderInfo.name).toBe("fastembed")
})

it("should initialize with specified model", () => {
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
expect(embedder.embedderInfo.name).toBe("fastembed")
})

it("should use fallback model for unsupported model", () => {
const consoleSpy = vi.spyOn(console, "warn").mockImplementation(() => {})

embedder = new FastEmbedEmbedder({ fastEmbedModel: "unsupported-model" })

expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Model "unsupported-model" not available'))

consoleSpy.mockRestore()
})
})

describe("createEmbeddings", () => {
beforeEach(() => {
embedder = new FastEmbedEmbedder({})
})

it("should create embeddings for single text using small model", async () => {
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)

const result = await embedder.createEmbeddings(["test text"])

expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: ["test text"] })
expect(result).toEqual({
embeddings: mockEmbeddings,
})
})

it("should create embeddings for multiple texts using small model", async () => {
const mockEmbeddings = [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
]
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)

const result = await embedder.createEmbeddings(["text 1", "text 2"])

expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: ["text 1", "text 2"] })
expect(result).toEqual({
embeddings: mockEmbeddings,
})
})

it("should create embeddings using base model when specified", async () => {
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
mockBaseDoEmbed.mockResolvedValue(mockEmbeddings)

const result = await embedder.createEmbeddings(["test text"])

expect(mockBaseDoEmbed).toHaveBeenCalledWith({ values: ["test text"] })
expect(result).toEqual({
embeddings: mockEmbeddings,
})
})

it("should handle empty input", async () => {
const result = await embedder.createEmbeddings([])

expect(mockSmallDoEmbed).not.toHaveBeenCalled()
expect(result).toEqual({
embeddings: [],
})
})

it("should handle FastEmbed API errors", async () => {
const error = new Error("FastEmbed API error")
mockSmallDoEmbed.mockRejectedValue(error)

await expect(embedder.createEmbeddings(["test text"])).rejects.toThrow(
"Failed to create embeddings with FastEmbed: FastEmbed API error",
)
})

it("should process large batches correctly", async () => {
const texts = Array.from({ length: 150 }, (_, i) => `text ${i}`)
const mockEmbeddings = texts.map((_, i) => [i * 0.1, i * 0.2, i * 0.3, i * 0.4])
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)

const result = await embedder.createEmbeddings(texts)

expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: texts })
expect(result.embeddings).toHaveLength(150)
})
})

describe("validateConfiguration", () => {
beforeEach(() => {
embedder = new FastEmbedEmbedder({})
})

it("should validate successfully with small model", async () => {
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)

const result = await embedder.validateConfiguration()

expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: ["test"] })
expect(result).toEqual({ valid: true })
})

it("should validate successfully with base model", async () => {
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
mockBaseDoEmbed.mockResolvedValue(mockEmbeddings)

const result = await embedder.validateConfiguration()

expect(mockBaseDoEmbed).toHaveBeenCalledWith({ values: ["test"] })
expect(result).toEqual({ valid: true })
})

it("should return invalid when FastEmbed fails", async () => {
const error = new Error("FastEmbed validation error")
mockSmallDoEmbed.mockRejectedValue(error)

const result = await embedder.validateConfiguration()

expect(result).toEqual({
valid: false,
error: "FastEmbed validation failed: FastEmbed validation error",
})
})

it("should handle unexpected validation errors", async () => {
mockSmallDoEmbed.mockRejectedValue("Unexpected error")

const result = await embedder.validateConfiguration()

expect(result).toEqual({
valid: false,
error: "FastEmbed validation failed: Unexpected error",
})
})
})

describe("embedderInfo", () => {
it("should return correct embedder info", () => {
embedder = new FastEmbedEmbedder({})
expect(embedder.embedderInfo).toEqual({
name: "fastembed",
})
})
})

describe("model selection", () => {
it("should use small model by default", () => {
embedder = new FastEmbedEmbedder({})
// We can't directly test the private property, but we can test the behavior
expect(() => embedder).not.toThrow()
})

it("should use base model when specified", () => {
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
expect(() => embedder).not.toThrow()
})

it("should use small model when explicitly specified", () => {
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-small-en-v1.5" })
expect(() => embedder).not.toThrow()
})
})
})
Loading
Loading