Skip to content

Commit 5cab585

Browse files
authored
fix: prioritize built-in model dimensions over custom dimensions (#5705)
1 parent 0c014f0 commit 5cab585

File tree

4 files changed

+228
-11
lines changed

4 files changed

+228
-11
lines changed

src/services/code-index/__tests__/config-manager.spec.ts

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@ import { PreviousConfigSnapshot } from "../interfaces/config"
88
// Mock ContextProxy
99
vi.mock("../../../core/config/ContextProxy")
1010

11+
// Mock embeddingModels module
12+
vi.mock("../../../shared/embeddingModels")
13+
14+
// Import mocked functions
15+
import { getDefaultModelId, getModelDimension, getModelScoreThreshold } from "../../../shared/embeddingModels"
16+
17+
// Type the mocked functions
18+
const mockedGetDefaultModelId = vi.mocked(getDefaultModelId)
19+
const mockedGetModelDimension = vi.mocked(getModelDimension)
20+
const mockedGetModelScoreThreshold = vi.mocked(getModelScoreThreshold)
21+
1122
describe("CodeIndexConfigManager", () => {
1223
let mockContextProxy: any
1324
let configManager: CodeIndexConfigManager
@@ -339,6 +350,14 @@ describe("CodeIndexConfigManager", () => {
339350
})
340351

341352
it("should NOT require restart when models have same dimensions", async () => {
353+
// Mock both models to have same dimension
354+
mockedGetModelDimension.mockImplementation((provider, modelId) => {
355+
if (modelId === "text-embedding-3-small" || modelId === "text-embedding-ada-002") {
356+
return 1536
357+
}
358+
return undefined
359+
})
360+
342361
// Initial state with text-embedding-3-small (1536D)
343362
mockContextProxy.getGlobalState.mockReturnValue({
344363
codebaseIndexEnabled: true,
@@ -794,6 +813,14 @@ describe("CodeIndexConfigManager", () => {
794813
})
795814

796815
it("should fall back to model-specific threshold when user setting is undefined", async () => {
816+
// Mock the model score threshold
817+
mockedGetModelScoreThreshold.mockImplementation((provider, modelId) => {
818+
if (provider === "ollama" && modelId === "nomic-embed-code") {
819+
return 0.15
820+
}
821+
return undefined
822+
})
823+
797824
mockContextProxy.getGlobalState.mockReturnValue({
798825
codebaseIndexEnabled: true,
799826
codebaseIndexQdrantUrl: "http://qdrant.local",
@@ -840,6 +867,14 @@ describe("CodeIndexConfigManager", () => {
840867
})
841868

842869
it("should use model-specific threshold with openai-compatible provider", async () => {
870+
// Mock the model score threshold
871+
mockedGetModelScoreThreshold.mockImplementation((provider, modelId) => {
872+
if (provider === "openai-compatible" && modelId === "nomic-embed-code") {
873+
return 0.15
874+
}
875+
return undefined
876+
})
877+
843878
mockContextProxy.getGlobalState.mockImplementation((key: string) => {
844879
if (key === "codebaseIndexConfig") {
845880
return {
@@ -882,6 +917,14 @@ describe("CodeIndexConfigManager", () => {
882917
})
883918

884919
it("should handle priority correctly: user > model > default", async () => {
920+
// Mock the model score threshold
921+
mockedGetModelScoreThreshold.mockImplementation((provider, modelId) => {
922+
if (provider === "ollama" && modelId === "nomic-embed-code") {
923+
return 0.15
924+
}
925+
return undefined
926+
})
927+
885928
// Test 1: User setting takes precedence
886929
mockContextProxy.getGlobalState.mockReturnValue({
887930
codebaseIndexEnabled: true,
@@ -1501,6 +1544,13 @@ describe("CodeIndexConfigManager", () => {
15011544
})
15021545

15031546
describe("loadConfiguration", () => {
1547+
beforeEach(() => {
1548+
// Set default mock behaviors
1549+
mockedGetDefaultModelId.mockReturnValue("text-embedding-3-small")
1550+
mockedGetModelDimension.mockReturnValue(undefined)
1551+
mockedGetModelScoreThreshold.mockReturnValue(undefined)
1552+
})
1553+
15041554
it("should load configuration and return proper structure", async () => {
15051555
const mockConfigValues = {
15061556
codebaseIndexEnabled: true,
@@ -1634,5 +1684,131 @@ describe("CodeIndexConfigManager", () => {
16341684
configManager = new CodeIndexConfigManager(mockContextProxy)
16351685
expect(configManager.isConfigured()).toBe(false)
16361686
})
1687+
1688+
describe("currentModelDimension", () => {
1689+
beforeEach(() => {
1690+
vi.clearAllMocks()
1691+
})
1692+
1693+
it("should return model's built-in dimension when available", async () => {
1694+
// Mock getModelDimension to return a built-in dimension
1695+
mockedGetModelDimension.mockReturnValue(1536)
1696+
1697+
mockContextProxy.getGlobalState.mockReturnValue({
1698+
codebaseIndexEnabled: true,
1699+
codebaseIndexEmbedderProvider: "openai",
1700+
codebaseIndexEmbedderModelId: "text-embedding-3-small",
1701+
codebaseIndexEmbedderModelDimension: 2048, // Custom dimension should be ignored
1702+
codebaseIndexQdrantUrl: "http://localhost:6333",
1703+
})
1704+
mockContextProxy.getSecret.mockImplementation((key: string) => {
1705+
if (key === "codeIndexOpenAiKey") return "test-key"
1706+
return undefined
1707+
})
1708+
1709+
configManager = new CodeIndexConfigManager(mockContextProxy)
1710+
await configManager.loadConfiguration()
1711+
1712+
// Should return model's built-in dimension, not custom
1713+
expect(configManager.currentModelDimension).toBe(1536)
1714+
expect(mockedGetModelDimension).toHaveBeenCalledWith("openai", "text-embedding-3-small")
1715+
})
1716+
1717+
it("should use custom dimension only when model has no built-in dimension", async () => {
1718+
// Mock getModelDimension to return undefined (no built-in dimension)
1719+
mockedGetModelDimension.mockReturnValue(undefined)
1720+
1721+
mockContextProxy.getGlobalState.mockReturnValue({
1722+
codebaseIndexEnabled: true,
1723+
codebaseIndexEmbedderProvider: "openai-compatible",
1724+
codebaseIndexEmbedderModelId: "custom-model",
1725+
codebaseIndexEmbedderModelDimension: 2048, // Custom dimension should be used
1726+
codebaseIndexQdrantUrl: "http://localhost:6333",
1727+
})
1728+
mockContextProxy.getSecret.mockImplementation((key: string) => {
1729+
if (key === "codebaseIndexOpenAiCompatibleApiKey") return "test-key"
1730+
return undefined
1731+
})
1732+
1733+
configManager = new CodeIndexConfigManager(mockContextProxy)
1734+
await configManager.loadConfiguration()
1735+
1736+
// Should use custom dimension as fallback
1737+
expect(configManager.currentModelDimension).toBe(2048)
1738+
expect(mockedGetModelDimension).toHaveBeenCalledWith("openai-compatible", "custom-model")
1739+
})
1740+
1741+
it("should return undefined when neither model dimension nor custom dimension is available", async () => {
1742+
// Mock getModelDimension to return undefined
1743+
mockedGetModelDimension.mockReturnValue(undefined)
1744+
1745+
mockContextProxy.getGlobalState.mockReturnValue({
1746+
codebaseIndexEnabled: true,
1747+
codebaseIndexEmbedderProvider: "openai-compatible",
1748+
codebaseIndexEmbedderModelId: "unknown-model",
1749+
// No custom dimension set
1750+
codebaseIndexQdrantUrl: "http://localhost:6333",
1751+
})
1752+
mockContextProxy.getSecret.mockImplementation((key: string) => {
1753+
if (key === "codebaseIndexOpenAiCompatibleApiKey") return "test-key"
1754+
return undefined
1755+
})
1756+
1757+
configManager = new CodeIndexConfigManager(mockContextProxy)
1758+
await configManager.loadConfiguration()
1759+
1760+
// Should return undefined
1761+
expect(configManager.currentModelDimension).toBe(undefined)
1762+
expect(mockedGetModelDimension).toHaveBeenCalledWith("openai-compatible", "unknown-model")
1763+
})
1764+
1765+
it("should use default model ID when modelId is not specified", async () => {
1766+
// Mock getDefaultModelId and getModelDimension
1767+
mockedGetDefaultModelId.mockReturnValue("text-embedding-3-small")
1768+
mockedGetModelDimension.mockReturnValue(1536)
1769+
1770+
mockContextProxy.getGlobalState.mockReturnValue({
1771+
codebaseIndexEnabled: true,
1772+
codebaseIndexEmbedderProvider: "openai",
1773+
// No modelId specified
1774+
codebaseIndexQdrantUrl: "http://localhost:6333",
1775+
})
1776+
mockContextProxy.getSecret.mockImplementation((key: string) => {
1777+
if (key === "codeIndexOpenAiKey") return "test-key"
1778+
return undefined
1779+
})
1780+
1781+
configManager = new CodeIndexConfigManager(mockContextProxy)
1782+
await configManager.loadConfiguration()
1783+
1784+
// Should use default model ID
1785+
expect(configManager.currentModelDimension).toBe(1536)
1786+
expect(mockedGetDefaultModelId).toHaveBeenCalledWith("openai")
1787+
expect(mockedGetModelDimension).toHaveBeenCalledWith("openai", "text-embedding-3-small")
1788+
})
1789+
1790+
it("should ignore invalid custom dimension (0 or negative)", async () => {
1791+
// Mock getModelDimension to return undefined
1792+
mockedGetModelDimension.mockReturnValue(undefined)
1793+
1794+
mockContextProxy.getGlobalState.mockReturnValue({
1795+
codebaseIndexEnabled: true,
1796+
codebaseIndexEmbedderProvider: "openai-compatible",
1797+
codebaseIndexEmbedderModelId: "custom-model",
1798+
codebaseIndexEmbedderModelDimension: 0, // Invalid dimension
1799+
codebaseIndexQdrantUrl: "http://localhost:6333",
1800+
})
1801+
mockContextProxy.getSecret.mockImplementation((key: string) => {
1802+
if (key === "codebaseIndexOpenAiCompatibleApiKey") return "test-key"
1803+
return undefined
1804+
})
1805+
1806+
configManager = new CodeIndexConfigManager(mockContextProxy)
1807+
await configManager.loadConfiguration()
1808+
1809+
// Should return undefined since custom dimension is invalid
1810+
expect(configManager.currentModelDimension).toBe(undefined)
1811+
})
1812+
})
16371813
})
16381814
})

src/services/code-index/__tests__/service-factory.spec.ts

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,42 @@ describe("CodeIndexServiceFactory", () => {
420420
)
421421
})
422422

423-
it("should prioritize manual modelDimension over getModelDimension for OpenAI Compatible provider", () => {
423+
it("should prioritize getModelDimension over manual modelDimension for OpenAI Compatible provider", () => {
424424
// Arrange
425425
const testModelId = "custom-model"
426426
const manualDimension = 1024
427+
const modelDimension = 768
428+
const testConfig = {
429+
embedderProvider: "openai-compatible",
430+
modelId: testModelId,
431+
modelDimension: manualDimension, // This should be ignored when model has built-in dimension
432+
openAiCompatibleOptions: {
433+
baseUrl: "https://api.example.com/v1",
434+
apiKey: "test-api-key",
435+
},
436+
qdrantUrl: "http://localhost:6333",
437+
qdrantApiKey: "test-key",
438+
}
439+
mockConfigManager.getConfig.mockReturnValue(testConfig as any)
440+
mockGetModelDimension.mockReturnValue(modelDimension) // This should be used
441+
442+
// Act
443+
factory.createVectorStore()
444+
445+
// Assert
446+
expect(mockGetModelDimension).toHaveBeenCalledWith("openai-compatible", testModelId)
447+
expect(MockedQdrantVectorStore).toHaveBeenCalledWith(
448+
"/test/workspace",
449+
"http://localhost:6333",
450+
modelDimension, // Should use model's built-in dimension, not manual
451+
"test-key",
452+
)
453+
})
454+
455+
it("should use manual modelDimension only when model has no built-in dimension", () => {
456+
// Arrange
457+
const testModelId = "unknown-model"
458+
const manualDimension = 1024
427459
const testConfig = {
428460
embedderProvider: "openai-compatible",
429461
modelId: testModelId,
@@ -436,17 +468,17 @@ describe("CodeIndexServiceFactory", () => {
436468
qdrantApiKey: "test-key",
437469
}
438470
mockConfigManager.getConfig.mockReturnValue(testConfig as any)
439-
mockGetModelDimension.mockReturnValue(768) // This should be ignored
471+
mockGetModelDimension.mockReturnValue(undefined) // Model has no built-in dimension
440472

441473
// Act
442474
factory.createVectorStore()
443475

444476
// Assert
445-
expect(mockGetModelDimension).not.toHaveBeenCalled()
477+
expect(mockGetModelDimension).toHaveBeenCalledWith("openai-compatible", testModelId)
446478
expect(MockedQdrantVectorStore).toHaveBeenCalledWith(
447479
"/test/workspace",
448480
"http://localhost:6333",
449-
manualDimension,
481+
manualDimension, // Should use manual dimension as fallback
450482
"test-key",
451483
)
452484
})

src/services/code-index/config-manager.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,10 +398,19 @@ export class CodeIndexConfigManager {
398398

399399
/**
400400
* Gets the current model dimension being used for embeddings.
401-
* Returns the explicitly configured dimension or undefined if not set.
401+
* Returns the model's built-in dimension if available, otherwise falls back to custom dimension.
402402
*/
403403
public get currentModelDimension(): number | undefined {
404-
return this.modelDimension
404+
// First try to get the model-specific dimension
405+
const modelId = this.modelId ?? getDefaultModelId(this.embedderProvider)
406+
const modelDimension = getModelDimension(this.embedderProvider, modelId)
407+
408+
// Only use custom dimension if model doesn't have a built-in dimension
409+
if (!modelDimension && this.modelDimension && this.modelDimension > 0) {
410+
return this.modelDimension
411+
}
412+
413+
return modelDimension
405414
}
406415

407416
/**

src/services/code-index/service-factory.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,12 @@ export class CodeIndexServiceFactory {
108108

109109
let vectorSize: number | undefined
110110

111-
// First check if a manual dimension is provided (works for all providers)
112-
if (config.modelDimension && config.modelDimension > 0) {
111+
// First try to get the model-specific dimension from profiles
112+
vectorSize = getModelDimension(provider, modelId)
113+
114+
// Only use manual dimension if model doesn't have a built-in dimension
115+
if (!vectorSize && config.modelDimension && config.modelDimension > 0) {
113116
vectorSize = config.modelDimension
114-
} else {
115-
// Fall back to model-specific dimension from profiles
116-
vectorSize = getModelDimension(provider, modelId)
117117
}
118118

119119
if (vectorSize === undefined || vectorSize <= 0) {

0 commit comments

Comments
 (0)