Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions src/services/code-index/config-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { ContextProxy } from "../../core/config/ContextProxy"
import { EmbedderProvider } from "./interfaces/manager"
import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config"
import { SEARCH_MIN_SCORE } from "./constants"
import { getDefaultModelId, getModelDimension } from "../../shared/embeddingModels"
import { getDefaultModelId, getModelDimension, getModelScoreThreshold } from "../../shared/embeddingModels"

/**
* Manages configuration state and validation for the code indexing feature.
Expand All @@ -18,7 +18,6 @@ export class CodeIndexConfigManager {
private openAiCompatibleOptions?: { baseUrl: string; apiKey: string; modelDimension?: number }
private qdrantUrl?: string = "http://localhost:6333"
private qdrantApiKey?: string
private searchMinScore?: number

constructor(private readonly contextProxy: ContextProxy) {
// Initialize with current configuration to avoid false restart triggers
Expand Down Expand Up @@ -61,7 +60,6 @@ export class CodeIndexConfigManager {
this.qdrantUrl = codebaseIndexQdrantUrl
this.qdrantApiKey = qdrantApiKey ?? ""
this.openAiOptions = { openAiNativeApiKey: openAiKey }
this.searchMinScore = SEARCH_MIN_SCORE

// Set embedder provider with support for openai-compatible
if (codebaseIndexEmbedderProvider === "ollama") {
Expand Down Expand Up @@ -139,7 +137,7 @@ export class CodeIndexConfigManager {
openAiCompatibleOptions: this.openAiCompatibleOptions,
qdrantUrl: this.qdrantUrl,
qdrantApiKey: this.qdrantApiKey,
searchMinScore: this.searchMinScore,
searchMinScore: this.currentSearchMinScore,
},
requiresRestart,
}
Expand Down Expand Up @@ -294,7 +292,7 @@ export class CodeIndexConfigManager {
openAiCompatibleOptions: this.openAiCompatibleOptions,
qdrantUrl: this.qdrantUrl,
qdrantApiKey: this.qdrantApiKey,
searchMinScore: this.searchMinScore,
searchMinScore: this.currentSearchMinScore,
}
}

Expand Down Expand Up @@ -337,9 +335,12 @@ export class CodeIndexConfigManager {
}

/**
* Gets the configured minimum search score.
* Gets the configured minimum search score based on the current model.
* Falls back to the constant SEARCH_MIN_SCORE if no model-specific threshold is found.
*/
public get currentSearchMinScore(): number | undefined {
return this.searchMinScore
public get currentSearchMinScore(): number {
const currentModelId = this.modelId ?? getDefaultModelId(this.embedderProvider)
const modelSpecificThreshold = getModelScoreThreshold(this.embedderProvider, currentModelId)
return modelSpecificThreshold ?? SEARCH_MIN_SCORE
}
}
7 changes: 6 additions & 1 deletion src/services/code-index/embedders/ollama.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { ApiHandlerOptions } from "../../../shared/api"
import { EmbedderInfo, EmbeddingResponse, IEmbedder } from "../interfaces"
import { getModelQueryPrefix } from "../../../shared/embeddingModels"
import { t } from "../../../i18n"

/**
Expand All @@ -25,6 +26,10 @@ export class CodeIndexOllamaEmbedder implements IEmbedder {
const modelToUse = model || this.defaultModelId
const url = `${this.baseUrl}/api/embed` // Endpoint as specified

// Apply model-specific query prefix if required
const queryPrefix = getModelQueryPrefix("ollama", modelToUse)
const processedTexts = queryPrefix ? texts.map((text) => `${queryPrefix}${text}`) : texts

try {
// Note: Standard Ollama API uses 'prompt' for single text, not 'input' for array.
// Implementing based on user's specific request structure.
Expand All @@ -35,7 +40,7 @@ export class CodeIndexOllamaEmbedder implements IEmbedder {
},
body: JSON.stringify({
model: modelToUse,
input: texts, // Using 'input' as requested
input: processedTexts, // Using 'input' as requested
}),
})

Expand Down
9 changes: 7 additions & 2 deletions src/services/code-index/embedders/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
MAX_BATCH_RETRIES as MAX_RETRIES,
INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS,
} from "../constants"
import { getDefaultModelId } from "../../../shared/embeddingModels"
import { getDefaultModelId, getModelQueryPrefix } from "../../../shared/embeddingModels"
import { t } from "../../../i18n"

interface EmbeddingItem {
Expand Down Expand Up @@ -59,9 +59,14 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
*/
async createEmbeddings(texts: string[], model?: string): Promise<EmbeddingResponse> {
const modelToUse = model || this.defaultModelId

// Apply model-specific query prefix if required
const queryPrefix = getModelQueryPrefix("openai-compatible", modelToUse)
const processedTexts = queryPrefix ? texts.map((text) => `${queryPrefix}${text}`) : texts

const allEmbeddings: number[][] = []
const usage = { promptTokens: 0, totalTokens: 0 }
const remainingTexts = [...texts]
const remainingTexts = [...processedTexts]

while (remainingTexts.length > 0) {
const currentBatch: string[] = []
Expand Down
8 changes: 7 additions & 1 deletion src/services/code-index/embedders/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
MAX_BATCH_RETRIES as MAX_RETRIES,
INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS,
} from "../constants"
import { getModelQueryPrefix } from "../../../shared/embeddingModels"
import { t } from "../../../i18n"

/**
Expand Down Expand Up @@ -36,9 +37,14 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder {
*/
async createEmbeddings(texts: string[], model?: string): Promise<EmbeddingResponse> {
const modelToUse = model || this.defaultModelId

// Apply model-specific query prefix if required
const queryPrefix = getModelQueryPrefix("openai", modelToUse)
const processedTexts = queryPrefix ? texts.map((text) => `${queryPrefix}${text}`) : texts

const allEmbeddings: number[][] = []
const usage = { promptTokens: 0, totalTokens: 0 }
const remainingTexts = [...texts]
const remainingTexts = [...processedTexts]

while (remainingTexts.length > 0) {
const currentBatch: string[] = []
Expand Down
2 changes: 1 addition & 1 deletion src/services/code-index/interfaces/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export interface CodeIndexConfig {
openAiCompatibleOptions?: { baseUrl: string; apiKey: string; modelDimension?: number }
qdrantUrl?: string
qdrantApiKey?: string
searchMinScore?: number
searchMinScore: number
}

/**
Expand Down
62 changes: 53 additions & 9 deletions src/shared/embeddingModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export type EmbedderProvider = "openai" | "ollama" | "openai-compatible" // Add

export interface EmbeddingModelProfile {
dimension: number
scoreThreshold?: number // Model-specific minimum score threshold for semantic search
queryPrefix?: string // Optional prefix required by the model for queries
// Add other model-specific properties if needed, e.g., context window size
}

Expand All @@ -18,21 +20,31 @@ export type EmbeddingModelProfiles = {
// Example profiles - expand this list as needed
export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = {
openai: {
"text-embedding-3-small": { dimension: 1536 },
"text-embedding-3-large": { dimension: 3072 },
"text-embedding-ada-002": { dimension: 1536 },
"text-embedding-3-small": { dimension: 1536, scoreThreshold: 0.4 },
"text-embedding-3-large": { dimension: 3072, scoreThreshold: 0.4 },
"text-embedding-ada-002": { dimension: 1536, scoreThreshold: 0.4 },
},
ollama: {
"nomic-embed-text": { dimension: 768 },
"mxbai-embed-large": { dimension: 1024 },
"all-minilm": { dimension: 384 },
"nomic-embed-text": { dimension: 768, scoreThreshold: 0.4 },
"nomic-embed-code": {
dimension: 3584,
scoreThreshold: 0.15,
queryPrefix: "Represent this query for searching relevant code: ",
Copy link

Copilot AI Jun 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extract this query prefix literal into a constant (e.g., NOMINC_EMBED_CODE_PREFIX) to reduce duplication and improve readability.

Suggested change
queryPrefix: "Represent this query for searching relevant code: ",
queryPrefix: NOMIC_EMBED_CODE_PREFIX,

Copilot uses AI. Check for mistakes.
},
"mxbai-embed-large": { dimension: 1024, scoreThreshold: 0.4 },
"all-minilm": { dimension: 384, scoreThreshold: 0.4 },
// Add default Ollama model if applicable, e.g.:
// 'default': { dimension: 768 } // Assuming a default dimension
},
"openai-compatible": {
"text-embedding-3-small": { dimension: 1536 },
"text-embedding-3-large": { dimension: 3072 },
"text-embedding-ada-002": { dimension: 1536 },
"text-embedding-3-small": { dimension: 1536, scoreThreshold: 0.4 },
"text-embedding-3-large": { dimension: 3072, scoreThreshold: 0.4 },
"text-embedding-ada-002": { dimension: 1536, scoreThreshold: 0.4 },
Comment on lines +23 to +42
Copy link

Copilot AI Jun 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting the repeated default scoreThreshold value (0.4) into a named constant (e.g., DEFAULT_SCORE_THRESHOLD) to avoid duplication and ease future updates.

Copilot uses AI. Check for mistakes.
"nomic-embed-code": {
dimension: 3584,
scoreThreshold: 0.15,
queryPrefix: "Represent this query for searching relevant code: ",
},
},
}

Expand All @@ -59,6 +71,38 @@ export function getModelDimension(provider: EmbedderProvider, modelId: string):
return modelProfile.dimension
}

/**
* Retrieves the score threshold for a given provider and model ID.
* @param provider The embedder provider (e.g., "openai").
* @param modelId The specific model ID (e.g., "text-embedding-3-small").
* @returns The score threshold or undefined if the model is not found.
*/
export function getModelScoreThreshold(provider: EmbedderProvider, modelId: string): number | undefined {
const providerProfiles = EMBEDDING_MODEL_PROFILES[provider]
if (!providerProfiles) {
return undefined
}

const modelProfile = providerProfiles[modelId]
return modelProfile?.scoreThreshold
}

/**
* Retrieves the query prefix for a given provider and model ID.
* @param provider The embedder provider (e.g., "openai").
* @param modelId The specific model ID (e.g., "nomic-embed-code").
* @returns The query prefix or undefined if the model doesn't require one.
*/
export function getModelQueryPrefix(provider: EmbedderProvider, modelId: string): string | undefined {
const providerProfiles = EMBEDDING_MODEL_PROFILES[provider]
if (!providerProfiles) {
return undefined
}

const modelProfile = providerProfiles[modelId]
return modelProfile?.queryPrefix
}

/**
* Gets the default *specific* embedding model ID based on the provider.
* Does not include the provider prefix.
Expand Down
Loading