Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/types/src/codebase-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export const codebaseIndexConfigSchema = z.object({
codebaseIndexEmbedderProvider: z.enum(["openai", "ollama", "openai-compatible"]).optional(),
codebaseIndexEmbedderBaseUrl: z.string().optional(),
codebaseIndexEmbedderModelId: z.string().optional(),
codebaseIndexSearchMinScore: z.number().min(0).max(1).optional(),
})

export type CodebaseIndexConfig = z.infer<typeof codebaseIndexConfigSchema>
Expand Down
147 changes: 147 additions & 0 deletions src/services/code-index/__tests__/config-manager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,153 @@ describe("CodeIndexConfigManager", () => {
const result = await configManager.loadConfiguration()
expect(result.requiresRestart).toBe(false)
})

describe("currentSearchMinScore priority system", () => {
it("should return user-configured score when set", async () => {
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "openai",
codebaseIndexEmbedderModelId: "text-embedding-3-small",
codebaseIndexSearchMinScore: 0.8, // User setting
})
mockContextProxy.getSecret.mockImplementation((key: string) => {
if (key === "codeIndexOpenAiKey") return "test-key"
return undefined
})

await configManager.loadConfiguration()
expect(configManager.currentSearchMinScore).toBe(0.8)
})

it("should fall back to model-specific threshold when user setting is undefined", async () => {
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "ollama",
codebaseIndexEmbedderModelId: "nomic-embed-code",
// No codebaseIndexSearchMinScore - user hasn't configured it
})

await configManager.loadConfiguration()
// nomic-embed-code has a specific threshold of 0.15
expect(configManager.currentSearchMinScore).toBe(0.15)
})

it("should fall back to default SEARCH_MIN_SCORE when neither user setting nor model threshold exists", async () => {
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "openai",
codebaseIndexEmbedderModelId: "unknown-model", // Model not in profiles
// No codebaseIndexSearchMinScore
})
mockContextProxy.getSecret.mockImplementation((key: string) => {
if (key === "codeIndexOpenAiKey") return "test-key"
return undefined
})

await configManager.loadConfiguration()
// Should fall back to default SEARCH_MIN_SCORE (0.4)
expect(configManager.currentSearchMinScore).toBe(0.4)
})

it("should respect user setting of 0 (edge case)", async () => {
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "ollama",
codebaseIndexEmbedderModelId: "nomic-embed-code",
codebaseIndexSearchMinScore: 0, // User explicitly sets 0
})

await configManager.loadConfiguration()
// Should return 0, not fall back to model threshold (0.15)
expect(configManager.currentSearchMinScore).toBe(0)
})

it("should use model-specific threshold with openai-compatible provider", async () => {
mockContextProxy.getGlobalState.mockImplementation((key: string) => {
if (key === "codebaseIndexConfig") {
return {
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "openai-compatible",
codebaseIndexEmbedderModelId: "nomic-embed-code",
// No codebaseIndexSearchMinScore
}
}
if (key === "codebaseIndexOpenAiCompatibleBaseUrl") return "https://api.example.com/v1"
return undefined
})
mockContextProxy.getSecret.mockImplementation((key: string) => {
if (key === "codebaseIndexOpenAiCompatibleApiKey") return "test-api-key"
return undefined
})

await configManager.loadConfiguration()
// openai-compatible provider also has nomic-embed-code with 0.15 threshold
expect(configManager.currentSearchMinScore).toBe(0.15)
})

it("should use default model ID when modelId is not specified", async () => {
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "openai",
// No modelId specified
// No codebaseIndexSearchMinScore
})
mockContextProxy.getSecret.mockImplementation((key: string) => {
if (key === "codeIndexOpenAiKey") return "test-key"
return undefined
})

await configManager.loadConfiguration()
// Should use default model (text-embedding-3-small) threshold (0.4)
expect(configManager.currentSearchMinScore).toBe(0.4)
})

it("should handle priority correctly: user > model > default", async () => {
// Test 1: User setting takes precedence
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "ollama",
codebaseIndexEmbedderModelId: "nomic-embed-code", // Has 0.15 threshold
codebaseIndexSearchMinScore: 0.9, // User overrides
})

await configManager.loadConfiguration()
expect(configManager.currentSearchMinScore).toBe(0.9) // User setting wins

// Test 2: Model threshold when no user setting
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "ollama",
codebaseIndexEmbedderModelId: "nomic-embed-code",
// No user setting
})

const newManager = new CodeIndexConfigManager(mockContextProxy)
await newManager.loadConfiguration()
expect(newManager.currentSearchMinScore).toBe(0.15) // Model threshold

// Test 3: Default when neither exists
mockContextProxy.getGlobalState.mockReturnValue({
codebaseIndexEnabled: true,
codebaseIndexQdrantUrl: "http://qdrant.local",
codebaseIndexEmbedderProvider: "openai",
codebaseIndexEmbedderModelId: "custom-unknown-model",
// No user setting, unknown model
})

const anotherManager = new CodeIndexConfigManager(mockContextProxy)
await anotherManager.loadConfiguration()
expect(anotherManager.currentSearchMinScore).toBe(0.4) // Default
})
})
})

describe("empty/missing API key handling", () => {
Expand Down
26 changes: 18 additions & 8 deletions src/services/code-index/config-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { ContextProxy } from "../../core/config/ContextProxy"
import { EmbedderProvider } from "./interfaces/manager"
import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config"
import { SEARCH_MIN_SCORE } from "./constants"
import { getDefaultModelId, getModelDimension } from "../../shared/embeddingModels"
import { getDefaultModelId, getModelDimension, getModelScoreThreshold } from "../../shared/embeddingModels"

/**
* Manages configuration state and validation for the code indexing feature.
Expand Down Expand Up @@ -34,10 +34,10 @@ export class CodeIndexConfigManager {
const codebaseIndexConfig = this.contextProxy?.getGlobalState("codebaseIndexConfig") ?? {
codebaseIndexEnabled: false,
codebaseIndexQdrantUrl: "http://localhost:6333",
codebaseIndexSearchMinScore: 0.4,
codebaseIndexEmbedderProvider: "openai",
codebaseIndexEmbedderBaseUrl: "",
codebaseIndexEmbedderModelId: "",
codebaseIndexSearchMinScore: undefined,
}

const {
Expand All @@ -46,6 +46,7 @@ export class CodeIndexConfigManager {
codebaseIndexEmbedderProvider,
codebaseIndexEmbedderBaseUrl,
codebaseIndexEmbedderModelId,
codebaseIndexSearchMinScore,
} = codebaseIndexConfig

const openAiKey = this.contextProxy?.getSecret("codeIndexOpenAiKey") ?? ""
Expand All @@ -60,8 +61,8 @@ export class CodeIndexConfigManager {
this.isEnabled = codebaseIndexEnabled || false
this.qdrantUrl = codebaseIndexQdrantUrl
this.qdrantApiKey = qdrantApiKey ?? ""
this.searchMinScore = codebaseIndexSearchMinScore
this.openAiOptions = { openAiNativeApiKey: openAiKey }
this.searchMinScore = SEARCH_MIN_SCORE

// Set embedder provider with support for openai-compatible
if (codebaseIndexEmbedderProvider === "ollama") {
Expand Down Expand Up @@ -139,7 +140,7 @@ export class CodeIndexConfigManager {
openAiCompatibleOptions: this.openAiCompatibleOptions,
qdrantUrl: this.qdrantUrl,
qdrantApiKey: this.qdrantApiKey,
searchMinScore: this.searchMinScore,
searchMinScore: this.currentSearchMinScore,
},
requiresRestart,
}
Expand Down Expand Up @@ -294,7 +295,7 @@ export class CodeIndexConfigManager {
openAiCompatibleOptions: this.openAiCompatibleOptions,
qdrantUrl: this.qdrantUrl,
qdrantApiKey: this.qdrantApiKey,
searchMinScore: this.searchMinScore,
searchMinScore: this.currentSearchMinScore,
}
}

Expand Down Expand Up @@ -337,9 +338,18 @@ export class CodeIndexConfigManager {
}

/**
* Gets the configured minimum search score.
* Gets the configured minimum search score based on user setting, model-specific threshold, or fallback.
* Priority: 1) User setting, 2) Model-specific threshold, 3) Default SEARCH_MIN_SCORE constant.
*/
public get currentSearchMinScore(): number | undefined {
return this.searchMinScore
public get currentSearchMinScore(): number {
// First check if user has configured a custom score threshold
if (this.searchMinScore !== undefined) {
return this.searchMinScore
}

// Fall back to model-specific threshold
const currentModelId = this.modelId ?? getDefaultModelId(this.embedderProvider)
const modelSpecificThreshold = getModelScoreThreshold(this.embedderProvider, currentModelId)
return modelSpecificThreshold ?? SEARCH_MIN_SCORE
}
}
9 changes: 8 additions & 1 deletion src/services/code-index/embedders/ollama.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { ApiHandlerOptions } from "../../../shared/api"
import { EmbedderInfo, EmbeddingResponse, IEmbedder } from "../interfaces"
import { getModelQueryPrefix } from "../../../shared/embeddingModels"
import { t } from "../../../i18n"

/**
Expand All @@ -25,6 +26,12 @@ export class CodeIndexOllamaEmbedder implements IEmbedder {
const modelToUse = model || this.defaultModelId
const url = `${this.baseUrl}/api/embed` // Endpoint as specified

// Apply model-specific query prefix if required
const queryPrefix = getModelQueryPrefix("ollama", modelToUse)
const processedTexts = queryPrefix
? texts.map((text) => (text.startsWith(queryPrefix) ? text : `${queryPrefix}${text}`))
: texts

try {
// Note: Standard Ollama API uses 'prompt' for single text, not 'input' for array.
// Implementing based on user's specific request structure.
Expand All @@ -35,7 +42,7 @@ export class CodeIndexOllamaEmbedder implements IEmbedder {
},
body: JSON.stringify({
model: modelToUse,
input: texts, // Using 'input' as requested
input: processedTexts, // Using 'input' as requested
}),
})

Expand Down
9 changes: 7 additions & 2 deletions src/services/code-index/embedders/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
MAX_BATCH_RETRIES as MAX_RETRIES,
INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS,
} from "../constants"
import { getDefaultModelId } from "../../../shared/embeddingModels"
import { getDefaultModelId, getModelQueryPrefix } from "../../../shared/embeddingModels"
import { t } from "../../../i18n"

interface EmbeddingItem {
Expand Down Expand Up @@ -59,9 +59,14 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
*/
async createEmbeddings(texts: string[], model?: string): Promise<EmbeddingResponse> {
const modelToUse = model || this.defaultModelId

// Apply model-specific query prefix if required
const queryPrefix = getModelQueryPrefix("openai-compatible", modelToUse)
const processedTexts = queryPrefix ? texts.map((text) => `${queryPrefix}${text}`) : texts

const allEmbeddings: number[][] = []
const usage = { promptTokens: 0, totalTokens: 0 }
const remainingTexts = [...texts]
const remainingTexts = [...processedTexts]

while (remainingTexts.length > 0) {
const currentBatch: string[] = []
Expand Down
62 changes: 53 additions & 9 deletions src/shared/embeddingModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export type EmbedderProvider = "openai" | "ollama" | "openai-compatible" // Add

export interface EmbeddingModelProfile {
dimension: number
scoreThreshold?: number // Model-specific minimum score threshold for semantic search
queryPrefix?: string // Optional prefix required by the model for queries
// Add other model-specific properties if needed, e.g., context window size
}

Expand All @@ -18,21 +20,31 @@ export type EmbeddingModelProfiles = {
// Example profiles - expand this list as needed
export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = {
openai: {
"text-embedding-3-small": { dimension: 1536 },
"text-embedding-3-large": { dimension: 3072 },
"text-embedding-ada-002": { dimension: 1536 },
"text-embedding-3-small": { dimension: 1536, scoreThreshold: 0.4 },
"text-embedding-3-large": { dimension: 3072, scoreThreshold: 0.4 },
"text-embedding-ada-002": { dimension: 1536, scoreThreshold: 0.4 },
},
ollama: {
"nomic-embed-text": { dimension: 768 },
"mxbai-embed-large": { dimension: 1024 },
"all-minilm": { dimension: 384 },
"nomic-embed-text": { dimension: 768, scoreThreshold: 0.4 },
"nomic-embed-code": {
dimension: 3584,
scoreThreshold: 0.15,
queryPrefix: "Represent this query for searching relevant code: ",
},
"mxbai-embed-large": { dimension: 1024, scoreThreshold: 0.4 },
"all-minilm": { dimension: 384, scoreThreshold: 0.4 },
// Add default Ollama model if applicable, e.g.:
// 'default': { dimension: 768 } // Assuming a default dimension
},
"openai-compatible": {
"text-embedding-3-small": { dimension: 1536 },
"text-embedding-3-large": { dimension: 3072 },
"text-embedding-ada-002": { dimension: 1536 },
"text-embedding-3-small": { dimension: 1536, scoreThreshold: 0.4 },
"text-embedding-3-large": { dimension: 3072, scoreThreshold: 0.4 },
"text-embedding-ada-002": { dimension: 1536, scoreThreshold: 0.4 },
"nomic-embed-code": {
dimension: 3584,
scoreThreshold: 0.15,
queryPrefix: "Represent this query for searching relevant code: ",
},
},
}

Expand All @@ -59,6 +71,38 @@ export function getModelDimension(provider: EmbedderProvider, modelId: string):
return modelProfile.dimension
}

/**
* Retrieves the score threshold for a given provider and model ID.
* @param provider The embedder provider (e.g., "openai").
* @param modelId The specific model ID (e.g., "text-embedding-3-small").
* @returns The score threshold or undefined if the model is not found.
*/
export function getModelScoreThreshold(provider: EmbedderProvider, modelId: string): number | undefined {
const providerProfiles = EMBEDDING_MODEL_PROFILES[provider]
if (!providerProfiles) {
return undefined
}

const modelProfile = providerProfiles[modelId]
return modelProfile?.scoreThreshold
}

/**
* Retrieves the query prefix for a given provider and model ID.
* @param provider The embedder provider (e.g., "openai").
* @param modelId The specific model ID (e.g., "nomic-embed-code").
* @returns The query prefix or undefined if the model doesn't require one.
*/
export function getModelQueryPrefix(provider: EmbedderProvider, modelId: string): string | undefined {
const providerProfiles = EMBEDDING_MODEL_PROFILES[provider]
if (!providerProfiles) {
return undefined
}

const modelProfile = providerProfiles[modelId]
return modelProfile?.queryPrefix
}

/**
* Gets the default *specific* embedding model ID based on the provider.
* Does not include the provider prefix.
Expand Down
Loading