Skip to content

Commit b2e8141

Browse files
committed
feat: implement local vector store and embedding capabilities
- Add LibSQLVectorStore implementation using @mastra/libsql for local SQLite-based vector storage - Add FastEmbedEmbedder implementation using @mastra/fastembed for local CPU-based embeddings - Support bge-small-en-v1.5 and bge-base-en-v1.5 embedding models - Update configuration system to support "local" vector store type and "fastembed" embedder provider - Add comprehensive test coverage for new implementations - Maintain backward compatibility with existing Qdrant and OpenAI integrations - Enable zero-cost, privacy-focused code indexing without external dependencies Resolves #5682
1 parent a163053 commit b2e8141

File tree

16 files changed

+4337
-53
lines changed

16 files changed

+4337
-53
lines changed

packages/types/src/codebase-index.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ export const CODEBASE_INDEX_DEFAULTS = {
2121
export const codebaseIndexConfigSchema = z.object({
2222
codebaseIndexEnabled: z.boolean().optional(),
2323
codebaseIndexQdrantUrl: z.string().optional(),
24-
codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible", "gemini"]).optional(),
24+
codebaseIndexVectorStoreType: z.enum(["qdrant", "local"]).optional(),
25+
codebaseIndexLocalVectorStorePath: z.string().optional(),
26+
codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible", "gemini", "fastembed"]).optional(),
2527
codebaseIndexEmbedderBaseUrl: z.string().optional(),
2628
codebaseIndexEmbedderModelId: z.string().optional(),
2729
codebaseIndexEmbedderModelDimension: z.number().optional(),
@@ -47,6 +49,7 @@ export const codebaseIndexModelsSchema = z.object({
4749
ollama: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
4850
"openai-compatible": z.record(z.string(), z.object({ dimension: z.number() })).optional(),
4951
gemini: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
52+
fastembed: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
5053
})
5154

5255
export type CodebaseIndexModels = z.infer<typeof codebaseIndexModelsSchema>

pnpm-lock.yaml

Lines changed: 3107 additions & 35 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,8 @@
392392
"@aws-sdk/credential-providers": "^3.806.0",
393393
"@google/genai": "^1.0.0",
394394
"@lmstudio/sdk": "^1.1.1",
395+
"@mastra/fastembed": "^0.10.1",
396+
"@mastra/libsql": "^0.11.0",
395397
"@mistralai/mistralai": "^1.3.6",
396398
"@modelcontextprotocol/sdk": "^1.9.0",
397399
"@qdrant/js-client-rest": "^1.14.0",

src/services/code-index/config-manager.ts

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ export class CodeIndexConfigManager {
2020
private geminiOptions?: { apiKey: string }
2121
private qdrantUrl?: string = "http://localhost:6333"
2222
private qdrantApiKey?: string
23+
private vectorStoreType?: "qdrant" | "local"
24+
private localVectorStorePath?: string
2325
private searchMinScore?: number
2426
private searchMaxResults?: number
2527

@@ -54,6 +56,8 @@ export class CodeIndexConfigManager {
5456
const {
5557
codebaseIndexEnabled,
5658
codebaseIndexQdrantUrl,
59+
codebaseIndexVectorStoreType,
60+
codebaseIndexLocalVectorStorePath,
5761
codebaseIndexEmbedderProvider,
5862
codebaseIndexEmbedderBaseUrl,
5963
codebaseIndexEmbedderModelId,
@@ -72,6 +76,8 @@ export class CodeIndexConfigManager {
7276
this.codebaseIndexEnabled = codebaseIndexEnabled ?? true
7377
this.qdrantUrl = codebaseIndexQdrantUrl
7478
this.qdrantApiKey = qdrantApiKey ?? ""
79+
this.vectorStoreType = codebaseIndexVectorStoreType as "qdrant" | "local" | undefined
80+
this.localVectorStorePath = codebaseIndexLocalVectorStorePath
7581
this.searchMinScore = codebaseIndexSearchMinScore
7682
this.searchMaxResults = codebaseIndexSearchMaxResults
7783

@@ -93,13 +99,15 @@ export class CodeIndexConfigManager {
9399

94100
this.openAiOptions = { openAiNativeApiKey: openAiKey }
95101

96-
// Set embedder provider with support for openai-compatible
102+
// Set embedder provider with support for openai-compatible and fastembed
97103
if (codebaseIndexEmbedderProvider === "ollama") {
98104
this.embedderProvider = "ollama"
99105
} else if (codebaseIndexEmbedderProvider === "openai-compatible") {
100106
this.embedderProvider = "openai-compatible"
101107
} else if (codebaseIndexEmbedderProvider === "gemini") {
102108
this.embedderProvider = "gemini"
109+
} else if (codebaseIndexEmbedderProvider === "fastembed") {
110+
this.embedderProvider = "fastembed"
103111
} else {
104112
this.embedderProvider = "openai"
105113
}
@@ -188,26 +196,28 @@ export class CodeIndexConfigManager {
188196
* Checks if the service is properly configured based on the embedder type.
189197
*/
190198
public isConfigured(): boolean {
199+
// Check if we have a vector store configured (either Qdrant or local)
200+
const hasVectorStore = this.qdrantUrl || this.vectorStoreType === "local"
201+
191202
if (this.embedderProvider === "openai") {
192203
const openAiKey = this.openAiOptions?.openAiNativeApiKey
193-
const qdrantUrl = this.qdrantUrl
194-
return !!(openAiKey && qdrantUrl)
204+
return !!(openAiKey && hasVectorStore)
195205
} else if (this.embedderProvider === "ollama") {
196206
// Ollama model ID has a default, so only base URL is strictly required for config
197207
const ollamaBaseUrl = this.ollamaOptions?.ollamaBaseUrl
198-
const qdrantUrl = this.qdrantUrl
199-
return !!(ollamaBaseUrl && qdrantUrl)
208+
return !!(ollamaBaseUrl && hasVectorStore)
200209
} else if (this.embedderProvider === "openai-compatible") {
201210
const baseUrl = this.openAiCompatibleOptions?.baseUrl
202211
const apiKey = this.openAiCompatibleOptions?.apiKey
203-
const qdrantUrl = this.qdrantUrl
204-
const isConfigured = !!(baseUrl && apiKey && qdrantUrl)
212+
const isConfigured = !!(baseUrl && apiKey && hasVectorStore)
205213
return isConfigured
206214
} else if (this.embedderProvider === "gemini") {
207215
const apiKey = this.geminiOptions?.apiKey
208-
const qdrantUrl = this.qdrantUrl
209-
const isConfigured = !!(apiKey && qdrantUrl)
216+
const isConfigured = !!(apiKey && hasVectorStore)
210217
return isConfigured
218+
} else if (this.embedderProvider === "fastembed") {
219+
// FastEmbed is local and doesn't require API keys, just a vector store
220+
return !!hasVectorStore
211221
}
212222
return false // Should not happen if embedderProvider is always set correctly
213223
}
@@ -353,6 +363,8 @@ export class CodeIndexConfigManager {
353363
geminiOptions: this.geminiOptions,
354364
qdrantUrl: this.qdrantUrl,
355365
qdrantApiKey: this.qdrantApiKey,
366+
vectorStoreType: this.vectorStoreType,
367+
localVectorStorePath: this.localVectorStorePath,
356368
searchMinScore: this.currentSearchMinScore,
357369
searchMaxResults: this.currentSearchMaxResults,
358370
}
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
// npx vitest services/code-index/embedders/__tests__/fastembed.spec.ts
2+
3+
import { describe, it, expect, beforeEach, vi } from "vitest"
4+
import { FastEmbedEmbedder } from "../fastembed"
5+
6+
// Mock TelemetryService
7+
vi.mock("@roo-code/telemetry", () => ({
8+
TelemetryService: {
9+
instance: {
10+
captureEvent: vi.fn(),
11+
},
12+
},
13+
}))
14+
15+
// Mock i18n
16+
vi.mock("../../../i18n", () => ({
17+
t: vi.fn((key: string, params?: any) => {
18+
if (key === "embeddings:fastembed.modelNotSupported") {
19+
return `Model "${params?.model}" not supported. Available models: ${params?.availableModels}`
20+
}
21+
if (key === "embeddings:fastembed.embeddingFailed") {
22+
return `Failed to create embeddings with FastEmbed: ${params?.message}`
23+
}
24+
if (key === "embeddings:fastembed.noValidTexts") {
25+
return "No valid texts to embed"
26+
}
27+
if (key === "embeddings:fastembed.invalidResponseFormat") {
28+
return "Invalid response format from FastEmbed"
29+
}
30+
if (key === "embeddings:fastembed.invalidEmbeddingFormat") {
31+
return "Invalid embedding format from FastEmbed"
32+
}
33+
return key
34+
}),
35+
}))
36+
37+
// Mock getModelQueryPrefix
38+
vi.mock("../../../shared/embeddingModels", () => ({
39+
getModelQueryPrefix: vi.fn(() => null),
40+
}))
41+
42+
// Mock @mastra/fastembed
43+
vi.mock("@mastra/fastembed", () => ({
44+
fastembed: {
45+
small: {
46+
doEmbed: vi.fn(),
47+
maxEmbeddingsPerCall: 256,
48+
},
49+
base: {
50+
doEmbed: vi.fn(),
51+
maxEmbeddingsPerCall: 256,
52+
},
53+
},
54+
}))
55+
56+
describe("FastEmbedEmbedder", () => {
57+
let embedder: FastEmbedEmbedder
58+
let mockSmallDoEmbed: any
59+
let mockBaseDoEmbed: any
60+
61+
beforeEach(() => {
62+
vi.clearAllMocks()
63+
64+
// Get references to the mocked functions
65+
const { fastembed } = require("@mastra/fastembed")
66+
mockSmallDoEmbed = fastembed.small.doEmbed
67+
mockBaseDoEmbed = fastembed.base.doEmbed
68+
})
69+
70+
describe("constructor", () => {
71+
it("should initialize with default model (bge-small-en-v1.5)", () => {
72+
embedder = new FastEmbedEmbedder({})
73+
expect(embedder.embedderInfo.name).toBe("fastembed")
74+
})
75+
76+
it("should initialize with specified model", () => {
77+
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
78+
expect(embedder.embedderInfo.name).toBe("fastembed")
79+
})
80+
81+
it("should use fallback model for unsupported model", () => {
82+
const consoleSpy = vi.spyOn(console, "warn").mockImplementation(() => {})
83+
84+
embedder = new FastEmbedEmbedder({ fastEmbedModel: "unsupported-model" })
85+
86+
expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Model "unsupported-model" not available'))
87+
88+
consoleSpy.mockRestore()
89+
})
90+
})
91+
92+
describe("createEmbeddings", () => {
93+
beforeEach(() => {
94+
embedder = new FastEmbedEmbedder({})
95+
})
96+
97+
it("should create embeddings for single text using small model", async () => {
98+
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
99+
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)
100+
101+
const result = await embedder.createEmbeddings(["test text"])
102+
103+
expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: ["test text"] })
104+
expect(result).toEqual({
105+
embeddings: mockEmbeddings,
106+
})
107+
})
108+
109+
it("should create embeddings for multiple texts using small model", async () => {
110+
const mockEmbeddings = [
111+
[0.1, 0.2, 0.3, 0.4],
112+
[0.5, 0.6, 0.7, 0.8],
113+
]
114+
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)
115+
116+
const result = await embedder.createEmbeddings(["text 1", "text 2"])
117+
118+
expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: ["text 1", "text 2"] })
119+
expect(result).toEqual({
120+
embeddings: mockEmbeddings,
121+
})
122+
})
123+
124+
it("should create embeddings using base model when specified", async () => {
125+
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
126+
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
127+
mockBaseDoEmbed.mockResolvedValue(mockEmbeddings)
128+
129+
const result = await embedder.createEmbeddings(["test text"])
130+
131+
expect(mockBaseDoEmbed).toHaveBeenCalledWith({ values: ["test text"] })
132+
expect(result).toEqual({
133+
embeddings: mockEmbeddings,
134+
})
135+
})
136+
137+
it("should handle empty input", async () => {
138+
const result = await embedder.createEmbeddings([])
139+
140+
expect(mockSmallDoEmbed).not.toHaveBeenCalled()
141+
expect(result).toEqual({
142+
embeddings: [],
143+
})
144+
})
145+
146+
it("should handle FastEmbed API errors", async () => {
147+
const error = new Error("FastEmbed API error")
148+
mockSmallDoEmbed.mockRejectedValue(error)
149+
150+
await expect(embedder.createEmbeddings(["test text"])).rejects.toThrow(
151+
"Failed to create embeddings with FastEmbed: FastEmbed API error",
152+
)
153+
})
154+
155+
it("should process large batches correctly", async () => {
156+
const texts = Array.from({ length: 150 }, (_, i) => `text ${i}`)
157+
const mockEmbeddings = texts.map((_, i) => [i * 0.1, i * 0.2, i * 0.3, i * 0.4])
158+
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)
159+
160+
const result = await embedder.createEmbeddings(texts)
161+
162+
expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: texts })
163+
expect(result.embeddings).toHaveLength(150)
164+
})
165+
})
166+
167+
describe("validateConfiguration", () => {
168+
beforeEach(() => {
169+
embedder = new FastEmbedEmbedder({})
170+
})
171+
172+
it("should validate successfully with small model", async () => {
173+
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
174+
mockSmallDoEmbed.mockResolvedValue(mockEmbeddings)
175+
176+
const result = await embedder.validateConfiguration()
177+
178+
expect(mockSmallDoEmbed).toHaveBeenCalledWith({ values: ["test"] })
179+
expect(result).toEqual({ valid: true })
180+
})
181+
182+
it("should validate successfully with base model", async () => {
183+
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
184+
const mockEmbeddings = [[0.1, 0.2, 0.3, 0.4]]
185+
mockBaseDoEmbed.mockResolvedValue(mockEmbeddings)
186+
187+
const result = await embedder.validateConfiguration()
188+
189+
expect(mockBaseDoEmbed).toHaveBeenCalledWith({ values: ["test"] })
190+
expect(result).toEqual({ valid: true })
191+
})
192+
193+
it("should return invalid when FastEmbed fails", async () => {
194+
const error = new Error("FastEmbed validation error")
195+
mockSmallDoEmbed.mockRejectedValue(error)
196+
197+
const result = await embedder.validateConfiguration()
198+
199+
expect(result).toEqual({
200+
valid: false,
201+
error: "FastEmbed validation failed: FastEmbed validation error",
202+
})
203+
})
204+
205+
it("should handle unexpected validation errors", async () => {
206+
mockSmallDoEmbed.mockRejectedValue("Unexpected error")
207+
208+
const result = await embedder.validateConfiguration()
209+
210+
expect(result).toEqual({
211+
valid: false,
212+
error: "FastEmbed validation failed: Unexpected error",
213+
})
214+
})
215+
})
216+
217+
describe("embedderInfo", () => {
218+
it("should return correct embedder info", () => {
219+
embedder = new FastEmbedEmbedder({})
220+
expect(embedder.embedderInfo).toEqual({
221+
name: "fastembed",
222+
})
223+
})
224+
})
225+
226+
describe("model selection", () => {
227+
it("should use small model by default", () => {
228+
embedder = new FastEmbedEmbedder({})
229+
// We can't directly test the private property, but we can test the behavior
230+
expect(() => embedder).not.toThrow()
231+
})
232+
233+
it("should use base model when specified", () => {
234+
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-base-en-v1.5" })
235+
expect(() => embedder).not.toThrow()
236+
})
237+
238+
it("should use small model when explicitly specified", () => {
239+
embedder = new FastEmbedEmbedder({ fastEmbedModel: "bge-small-en-v1.5" })
240+
expect(() => embedder).not.toThrow()
241+
})
242+
})
243+
})

0 commit comments

Comments
 (0)