diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index 44937da235..af61e2bfb7 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -21,3 +21,4 @@ export * from "./tool.js" export * from "./type-fu.js" export * from "./vscode.js" export * from "./todo.js" +export * from "./qdrant.js" diff --git a/packages/types/src/qdrant.ts b/packages/types/src/qdrant.ts new file mode 100644 index 0000000000..8bfb79aa03 --- /dev/null +++ b/packages/types/src/qdrant.ts @@ -0,0 +1,122 @@ +/** + * Qdrant Vector Store Configuration Constants + * + * These constants define default values for Qdrant memory optimization settings + * to reduce RAM usage by storing vectors and indexes on disk instead of in memory. + */ + +/** + * Default memory optimization settings for Qdrant + */ +export const QDRANT_MEMORY_OPTIMIZATION_DEFAULTS = { + /** + * Enable on-disk storage for vectors and HNSW indexes by default + * This significantly reduces memory usage at the cost of slightly slower access + */ + USE_ON_DISK_STORAGE: true, + + /** + * Number of vectors before using memory-mapped files + * Segments larger than this threshold will use memory-mapped files for better memory management + */ + MEMORY_MAP_THRESHOLD: 50000, + + /** + * HNSW search parameter (ef) - controls search quality vs memory usage + * Higher values = better search quality but more memory usage + * Lower values = less memory usage but potentially lower search quality + * Default: 128 (original value, not reduced for testing purposes) + */ + HNSW_EF_SEARCH: 128, +} as const + +/** + * HNSW (Hierarchical Navigable Small World) index configuration constants + */ +export const QDRANT_HNSW_CONFIG_DEFAULTS = { + /** + * Number of bi-directional links created for each node during construction + */ + M: 16, + + /** + * Size of the dynamic list during index construction + */ + EF_CONSTRUCT: 100, + + /** + * Use full scan for collections smaller than this threshold + */ + FULL_SCAN_THRESHOLD: 10000, + + /** + * Maximum number of threads for indexing (0 = use all available CPU cores) + */ + MAX_INDEXING_THREADS: 0, + + /** + * Payload index configuration (null = use default) + */ + PAYLOAD_M: null, +} as const + +/** + * Optimizer configuration constants for memory-mapped storage + */ +export const QDRANT_OPTIMIZER_CONFIG_DEFAULTS = { + /** + * Trigger optimization when this percentage of vectors are deleted + */ + DELETED_THRESHOLD: 0.2, + + /** + * Minimum number of vectors before vacuum operation + */ + VACUUM_MIN_VECTOR_NUMBER: 1000, + + /** + * Default number of segments to create + */ + DEFAULT_SEGMENT_NUMBER: 2, + + /** + * Maximum segment size (null = no limit) + */ + MAX_SEGMENT_SIZE: null, + + /** + * Start indexing after this many vectors + */ + INDEXING_THRESHOLD: 20000, + + /** + * Flush to disk interval in seconds + */ + FLUSH_INTERVAL_SEC: 5, + + /** + * Maximum optimization threads (0 = use all available CPU cores) + */ + MAX_OPTIMIZATION_THREADS: 0, +} as const + +/** + * Quantization configuration for search operations + * These settings are applied during vector search queries to balance memory efficiency and search quality + */ +export const QDRANT_QUANTIZATION_CONFIG_DEFAULTS = { + /** + * Enable quantization for memory efficiency + */ + IGNORE: false, + + /** + * Rescore with original vectors for accuracy + */ + RESCORE: true, + + /** + * Oversample to maintain quality + */ + OVERSAMPLING: 2.0, +} as const diff --git a/src/services/code-index/__tests__/config-manager.spec.ts b/src/services/code-index/__tests__/config-manager.spec.ts index 2d6e704d76..673eeb046a 100644 --- a/src/services/code-index/__tests__/config-manager.spec.ts +++ b/src/services/code-index/__tests__/config-manager.spec.ts @@ -104,8 +104,12 @@ describe("CodeIndexConfigManager", () => { isConfigured: false, embedderProvider: "openai", modelId: undefined, + modelDimension: undefined, openAiOptions: { openAiNativeApiKey: "" }, ollamaOptions: { ollamaBaseUrl: "" }, + openAiCompatibleOptions: undefined, + geminiOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://localhost:6333", qdrantApiKey: "", searchMinScore: 0.4, @@ -135,8 +139,12 @@ describe("CodeIndexConfigManager", () => { isConfigured: true, embedderProvider: "openai", modelId: "text-embedding-3-large", + modelDimension: undefined, openAiOptions: { openAiNativeApiKey: "test-openai-key" }, ollamaOptions: { ollamaBaseUrl: "" }, + openAiCompatibleOptions: undefined, + geminiOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, @@ -168,12 +176,15 @@ describe("CodeIndexConfigManager", () => { isConfigured: true, embedderProvider: "openai-compatible", modelId: "text-embedding-3-large", + modelDimension: undefined, openAiOptions: { openAiNativeApiKey: "" }, ollamaOptions: { ollamaBaseUrl: "" }, openAiCompatibleOptions: { baseUrl: "https://api.example.com/v1", apiKey: "test-openai-compatible-key", }, + geminiOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, @@ -212,6 +223,8 @@ describe("CodeIndexConfigManager", () => { baseUrl: "https://api.example.com/v1", apiKey: "test-openai-compatible-key", }, + geminiOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, @@ -243,6 +256,7 @@ describe("CodeIndexConfigManager", () => { isConfigured: true, embedderProvider: "openai-compatible", modelId: "custom-model", + modelDimension: undefined, openAiOptions: { openAiNativeApiKey: "" }, ollamaOptions: { ollamaBaseUrl: "" }, openAiCompatibleOptions: { @@ -250,6 +264,8 @@ describe("CodeIndexConfigManager", () => { apiKey: "test-openai-compatible-key", // modelDimension is undefined when not set }, + geminiOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, @@ -289,6 +305,7 @@ describe("CodeIndexConfigManager", () => { apiKey: "test-openai-compatible-key", }, geminiOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, @@ -1292,10 +1309,12 @@ describe("CodeIndexConfigManager", () => { isConfigured: true, embedderProvider: "openai", modelId: "text-embedding-3-large", + modelDimension: undefined, openAiOptions: { openAiNativeApiKey: "test-openai-key" }, ollamaOptions: { ollamaBaseUrl: undefined }, geminiOptions: undefined, openAiCompatibleOptions: undefined, + mistralOptions: undefined, qdrantUrl: "http://qdrant.local", qdrantApiKey: "test-qdrant-key", searchMinScore: 0.4, diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index 1723f1c2a0..32bdbc875b 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -4,6 +4,7 @@ import { EmbedderProvider } from "./interfaces/manager" import { CodeIndexConfig, PreviousConfigSnapshot } from "./interfaces/config" import { DEFAULT_SEARCH_MIN_SCORE, DEFAULT_MAX_SEARCH_RESULTS } from "./constants" import { getDefaultModelId, getModelDimension, getModelScoreThreshold } from "../../shared/embeddingModels" +import { QDRANT_MEMORY_OPTIMIZATION_DEFAULTS } from "@roo-code/types" /** * Manages configuration state and validation for the code indexing feature. diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index 68b0f5c0bc..f30d905e03 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -136,7 +136,7 @@ export class CodeIndexServiceFactory { throw new Error(t("embeddings:serviceFactory.qdrantUrlMissing")) } - // Assuming constructor is updated: new QdrantVectorStore(workspacePath, url, vectorSize, apiKey?) + // Create QdrantVectorStore (memory optimization is always enabled via constants) return new QdrantVectorStore(this.workspacePath, config.qdrantUrl, vectorSize, config.qdrantApiKey) } diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index e539c2edde..4c03d901a9 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -528,6 +528,25 @@ describe("QdrantVectorStore", () => { vectors: { size: mockVectorSize, distance: "Cosine", // Assuming 'Cosine' is the DISTANCE_METRIC + on_disk: true, // Default memory optimization + }, + hnsw_config: { + m: 16, + ef_construct: 100, + full_scan_threshold: 10000, + max_indexing_threads: 0, + on_disk: true, + payload_m: null, + }, + optimizers_config: { + deleted_threshold: 0.2, + vacuum_min_vector_number: 1000, + default_segment_number: 2, + max_segment_size: null, + memmap_threshold: 50000, + indexing_threshold: 20000, + flush_interval_sec: 5, + max_optimization_threads: 0, }, }) expect(mockQdrantClientInstance.deleteCollection).not.toHaveBeenCalled() @@ -606,6 +625,25 @@ describe("QdrantVectorStore", () => { vectors: { size: mockVectorSize, // Should use the new, correct vector size distance: "Cosine", + on_disk: true, // Default memory optimization + }, + hnsw_config: { + m: 16, + ef_construct: 100, + full_scan_threshold: 10000, + max_indexing_threads: 0, + on_disk: true, + payload_m: null, + }, + optimizers_config: { + deleted_threshold: 0.2, + vacuum_min_vector_number: 1000, + default_segment_number: 2, + max_segment_size: null, + memmap_threshold: 50000, + indexing_threshold: 20000, + flush_interval_sec: 5, + max_optimization_threads: 0, }, }) @@ -899,6 +937,25 @@ describe("QdrantVectorStore", () => { vectors: { size: newVectorSize, // Should create with new 768 dimensions distance: "Cosine", + on_disk: true, // Default memory optimization + }, + hnsw_config: { + m: 16, + ef_construct: 100, + full_scan_threshold: 10000, + max_indexing_threads: 0, + on_disk: true, + payload_m: null, + }, + optimizers_config: { + deleted_threshold: 0.2, + vacuum_min_vector_number: 1000, + default_segment_number: 2, + max_segment_size: null, + memmap_threshold: 50000, + indexing_threshold: 20000, + flush_interval_sec: 5, + max_optimization_threads: 0, }, }) expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(5) @@ -1244,8 +1301,13 @@ describe("QdrantVectorStore", () => { score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { - hnsw_ef: 128, + hnsw_ef: 128, // Default memory optimized value exact: false, + quantization: { + ignore: false, + rescore: true, + oversampling: 2.0, + }, }, with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], @@ -1295,8 +1357,13 @@ describe("QdrantVectorStore", () => { score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { - hnsw_ef: 128, + hnsw_ef: 128, // Default memory optimized value exact: false, + quantization: { + ignore: false, + rescore: true, + oversampling: 2.0, + }, }, with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], @@ -1321,8 +1388,13 @@ describe("QdrantVectorStore", () => { score_threshold: customMinScore, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { - hnsw_ef: 128, + hnsw_ef: 128, // Default memory optimized value exact: false, + quantization: { + ignore: false, + rescore: true, + oversampling: 2.0, + }, }, with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], @@ -1345,8 +1417,13 @@ describe("QdrantVectorStore", () => { score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: customMaxResults, params: { - hnsw_ef: 128, + hnsw_ef: 128, // Default memory optimized value exact: false, + quantization: { + ignore: false, + rescore: true, + oversampling: 2.0, + }, }, with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], @@ -1492,8 +1569,13 @@ describe("QdrantVectorStore", () => { score_threshold: DEFAULT_SEARCH_MIN_SCORE, limit: DEFAULT_MAX_SEARCH_RESULTS, params: { - hnsw_ef: 128, + hnsw_ef: 128, // Default memory optimized value exact: false, + quantization: { + ignore: false, + rescore: true, + oversampling: 2.0, + }, }, with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"], diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 5121d65b97..875a3ad35e 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -6,6 +6,12 @@ import { IVectorStore } from "../interfaces/vector-store" import { Payload, VectorStoreSearchResult } from "../interfaces" import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants" import { t } from "../../../i18n" +import { + QDRANT_MEMORY_OPTIMIZATION_DEFAULTS, + QDRANT_HNSW_CONFIG_DEFAULTS, + QDRANT_OPTIMIZER_CONFIG_DEFAULTS, + QDRANT_QUANTIZATION_CONFIG_DEFAULTS, +} from "@roo-code/types" /** * Qdrant implementation of the vector store interface @@ -22,6 +28,8 @@ export class QdrantVectorStore implements IVectorStore { * Creates a new Qdrant vector store * @param workspacePath Path to the workspace * @param url Optional URL to the Qdrant server + * @param vectorSize Size of the vectors + * @param apiKey Optional API key for authentication */ constructor(workspacePath: string, url: string, vectorSize: number, apiKey?: string) { // Parse the URL to determine the appropriate QdrantClient configuration @@ -155,6 +163,27 @@ export class QdrantVectorStore implements IVectorStore { vectors: { size: this.vectorSize, distance: this.DISTANCE_METRIC, + on_disk: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.USE_ON_DISK_STORAGE, + }, + // Configure HNSW index for memory efficiency + hnsw_config: { + m: QDRANT_HNSW_CONFIG_DEFAULTS.M, + ef_construct: QDRANT_HNSW_CONFIG_DEFAULTS.EF_CONSTRUCT, + full_scan_threshold: QDRANT_HNSW_CONFIG_DEFAULTS.FULL_SCAN_THRESHOLD, + max_indexing_threads: QDRANT_HNSW_CONFIG_DEFAULTS.MAX_INDEXING_THREADS, + on_disk: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.USE_ON_DISK_STORAGE, + payload_m: QDRANT_HNSW_CONFIG_DEFAULTS.PAYLOAD_M, + }, + // Enable memory-mapped storage for better memory management + optimizers_config: { + deleted_threshold: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.DELETED_THRESHOLD, + vacuum_min_vector_number: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.VACUUM_MIN_VECTOR_NUMBER, + default_segment_number: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.DEFAULT_SEGMENT_NUMBER, + max_segment_size: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.MAX_SEGMENT_SIZE, + memmap_threshold: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.MEMORY_MAP_THRESHOLD, + indexing_threshold: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.INDEXING_THRESHOLD, + flush_interval_sec: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.FLUSH_INTERVAL_SEC, + max_optimization_threads: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.MAX_OPTIMIZATION_THREADS, }, }) created = true @@ -244,6 +273,27 @@ export class QdrantVectorStore implements IVectorStore { vectors: { size: this.vectorSize, distance: this.DISTANCE_METRIC, + on_disk: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.USE_ON_DISK_STORAGE, + }, + // Configure HNSW index for memory efficiency + hnsw_config: { + m: QDRANT_HNSW_CONFIG_DEFAULTS.M, + ef_construct: QDRANT_HNSW_CONFIG_DEFAULTS.EF_CONSTRUCT, + full_scan_threshold: QDRANT_HNSW_CONFIG_DEFAULTS.FULL_SCAN_THRESHOLD, + max_indexing_threads: QDRANT_HNSW_CONFIG_DEFAULTS.MAX_INDEXING_THREADS, + on_disk: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.USE_ON_DISK_STORAGE, + payload_m: QDRANT_HNSW_CONFIG_DEFAULTS.PAYLOAD_M, + }, + // Enable memory-mapped storage for better memory management + optimizers_config: { + deleted_threshold: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.DELETED_THRESHOLD, + vacuum_min_vector_number: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.VACUUM_MIN_VECTOR_NUMBER, + default_segment_number: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.DEFAULT_SEGMENT_NUMBER, + max_segment_size: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.MAX_SEGMENT_SIZE, + memmap_threshold: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.MEMORY_MAP_THRESHOLD, + indexing_threshold: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.INDEXING_THRESHOLD, + flush_interval_sec: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.FLUSH_INTERVAL_SEC, + max_optimization_threads: QDRANT_OPTIMIZER_CONFIG_DEFAULTS.MAX_OPTIMIZATION_THREADS, }, }) console.log(`[QdrantVectorStore] Successfully created new collection ${this.collectionName}`) @@ -391,8 +441,13 @@ export class QdrantVectorStore implements IVectorStore { score_threshold: minScore ?? DEFAULT_SEARCH_MIN_SCORE, limit: maxResults ?? DEFAULT_MAX_SEARCH_RESULTS, params: { - hnsw_ef: 128, + hnsw_ef: QDRANT_MEMORY_OPTIMIZATION_DEFAULTS.HNSW_EF_SEARCH, exact: false, + quantization: { + ignore: QDRANT_QUANTIZATION_CONFIG_DEFAULTS.IGNORE, + rescore: QDRANT_QUANTIZATION_CONFIG_DEFAULTS.RESCORE, + oversampling: QDRANT_QUANTIZATION_CONFIG_DEFAULTS.OVERSAMPLING, + }, }, with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],