-
Notifications
You must be signed in to change notification settings - Fork 2.6k
fix: prioritize user-configured embedding dimension over model default #8106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1688,15 +1688,15 @@ describe("CodeIndexConfigManager", () => { | |
| vi.clearAllMocks() | ||
| }) | ||
|
|
||
| it("should return model's built-in dimension when available", async () => { | ||
| it("should prioritize user-configured dimension over model's built-in dimension", async () => { | ||
| // Mock getModelDimension to return a built-in dimension | ||
| mockedGetModelDimension.mockReturnValue(1536) | ||
|
|
||
| mockContextProxy.getGlobalState.mockReturnValue({ | ||
| codebaseIndexEnabled: true, | ||
| codebaseIndexEmbedderProvider: "openai", | ||
| codebaseIndexEmbedderModelId: "text-embedding-3-small", | ||
| codebaseIndexEmbedderModelDimension: 2048, // Custom dimension should be ignored | ||
| codebaseIndexEmbedderModelDimension: 2048, // Custom dimension should take priority | ||
| codebaseIndexQdrantUrl: "http://localhost:6333", | ||
| }) | ||
| mockContextProxy.getSecret.mockImplementation((key: string) => { | ||
|
|
@@ -1707,20 +1707,46 @@ describe("CodeIndexConfigManager", () => { | |
| configManager = new CodeIndexConfigManager(mockContextProxy) | ||
| await configManager.loadConfiguration() | ||
|
|
||
| // Should return model's built-in dimension, not custom | ||
| // Should return user-configured dimension, not model's built-in | ||
| expect(configManager.currentModelDimension).toBe(2048) | ||
| // getModelDimension should not be called when custom dimension is set | ||
| expect(mockedGetModelDimension).not.toHaveBeenCalled() | ||
| }) | ||
|
|
||
| it("should fall back to model's built-in dimension when no custom dimension is set", async () => { | ||
| // Mock getModelDimension to return a built-in dimension | ||
| mockedGetModelDimension.mockReturnValue(1536) | ||
|
|
||
| mockContextProxy.getGlobalState.mockReturnValue({ | ||
| codebaseIndexEnabled: true, | ||
| codebaseIndexEmbedderProvider: "openai", | ||
| codebaseIndexEmbedderModelId: "text-embedding-3-small", | ||
| // No custom dimension set | ||
| codebaseIndexQdrantUrl: "http://localhost:6333", | ||
| }) | ||
| mockContextProxy.getSecret.mockImplementation((key: string) => { | ||
| if (key === "codeIndexOpenAiKey") return "test-key" | ||
| return undefined | ||
| }) | ||
|
|
||
| configManager = new CodeIndexConfigManager(mockContextProxy) | ||
| await configManager.loadConfiguration() | ||
|
|
||
| // Should fall back to model's built-in dimension | ||
| expect(configManager.currentModelDimension).toBe(1536) | ||
| expect(mockedGetModelDimension).toHaveBeenCalledWith("openai", "text-embedding-3-small") | ||
| }) | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great addition! This test specifically addresses the issue #8102 scenario where user sets 1536 but model default is 1024. This ensures the bug won't regress. |
||
| it("should use custom dimension only when model has no built-in dimension", async () => { | ||
| // Mock getModelDimension to return undefined (no built-in dimension) | ||
| mockedGetModelDimension.mockReturnValue(undefined) | ||
| it("should use user-configured dimension (1536) even when model default is different (1024)", async () => { | ||
| // This test specifically addresses the issue reported in #8102 | ||
| // Mock getModelDimension to return 1024 (model default) | ||
| mockedGetModelDimension.mockReturnValue(1024) | ||
|
|
||
| mockContextProxy.getGlobalState.mockReturnValue({ | ||
| codebaseIndexEnabled: true, | ||
| codebaseIndexEmbedderProvider: "openai-compatible", | ||
| codebaseIndexEmbedderModelId: "custom-model", | ||
| codebaseIndexEmbedderModelDimension: 2048, // Custom dimension should be used | ||
| codebaseIndexEmbedderModelId: "some-model-with-1024-default", | ||
| codebaseIndexEmbedderModelDimension: 1536, // User explicitly sets 1536 | ||
| codebaseIndexQdrantUrl: "http://localhost:6333", | ||
| }) | ||
| mockContextProxy.getSecret.mockImplementation((key: string) => { | ||
|
|
@@ -1731,9 +1757,10 @@ describe("CodeIndexConfigManager", () => { | |
| configManager = new CodeIndexConfigManager(mockContextProxy) | ||
| await configManager.loadConfiguration() | ||
|
|
||
| // Should use custom dimension as fallback | ||
| expect(configManager.currentModelDimension).toBe(2048) | ||
| expect(mockedGetModelDimension).toHaveBeenCalledWith("openai-compatible", "custom-model") | ||
| // Should use user-configured dimension (1536), not model default (1024) | ||
| expect(configManager.currentModelDimension).toBe(1536) | ||
| // getModelDimension should not be called when custom dimension is set | ||
| expect(mockedGetModelDimension).not.toHaveBeenCalled() | ||
| }) | ||
|
|
||
| it("should return undefined when neither model dimension nor custom dimension is available", async () => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -442,19 +442,17 @@ export class CodeIndexConfigManager { | |
|
|
||
| /** | ||
| * Gets the current model dimension being used for embeddings. | ||
| * Returns the model's built-in dimension if available, otherwise falls back to custom dimension. | ||
| * Returns the user-configured custom dimension if set, otherwise falls back to model's built-in dimension. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice fix! The logic correctly prioritizes user-configured dimensions now. Though I wonder if the JSDoc comment could be even clearer by explicitly stating "User-configured dimensions override any model defaults" to make the priority crystal clear? |
||
| */ | ||
| public get currentModelDimension(): number | undefined { | ||
| // First try to get the model-specific dimension | ||
| const modelId = this.modelId ?? getDefaultModelId(this.embedderProvider) | ||
| const modelDimension = getModelDimension(this.embedderProvider, modelId) | ||
|
|
||
| // Only use custom dimension if model doesn't have a built-in dimension | ||
| if (!modelDimension && this.modelDimension && this.modelDimension > 0) { | ||
| // Prioritize user-configured custom dimension if explicitly set | ||
| if (this.modelDimension && this.modelDimension > 0) { | ||
| return this.modelDimension | ||
| } | ||
|
|
||
| return modelDimension | ||
| // Fall back to model-specific dimension from profiles | ||
| const modelId = this.modelId ?? getDefaultModelId(this.embedderProvider) | ||
| return getModelDimension(this.embedderProvider, modelId) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -123,12 +123,12 @@ export class CodeIndexServiceFactory { | |
|
|
||
| let vectorSize: number | undefined | ||
|
|
||
| // First try to get the model-specific dimension from profiles | ||
| vectorSize = getModelDimension(provider, modelId) | ||
|
|
||
| // Only use manual dimension if model doesn't have a built-in dimension | ||
| if (!vectorSize && config.modelDimension && config.modelDimension > 0) { | ||
| // Prioritize user-configured custom dimension if explicitly set | ||
| if (config.modelDimension && config.modelDimension > 0) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good consistency with the config-manager implementation. The if-else structure makes the priority clear. |
||
| vectorSize = config.modelDimension | ||
| } else { | ||
| // Fall back to model-specific dimension from profiles | ||
| vectorSize = getModelDimension(provider, modelId) | ||
| } | ||
|
|
||
| if (vectorSize === undefined || vectorSize <= 0) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excellent test coverage! This test name could be even more descriptive though. How about "should always use user-configured dimension when set, ignoring model defaults" to really emphasize the override behavior?