From 351ca02394ee5f511edae48c296f4455a4920d43 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 12 Sep 2025 22:00:42 +0000 Subject: [PATCH 1/2] feat: add deterministic naming for Qdrant collections - Add support for custom collection names via .roo/codebase-index.json - Use git repository URL for deterministic collection naming across worktrees - Fall back to workspace path hash when no git repo is available - Normalize git URLs for consistent hashing - Add comprehensive tests for new naming strategies Fixes #7940 --- .../__tests__/qdrant-client.spec.ts | 94 +++++++++++++ .../code-index/vector-store/qdrant-client.ts | 133 +++++++++++++++++- 2 files changed, 224 insertions(+), 3 deletions(-) diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index 8947c2f3e7..4dda82998c 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -1,5 +1,6 @@ import { QdrantClient } from "@qdrant/js-client-rest" import { createHash } from "crypto" +import * as fs from "fs" import { QdrantVectorStore } from "../qdrant-client" import { getWorkspacePath } from "../../../../utils/path" @@ -8,6 +9,7 @@ import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../cons // Mocks vitest.mock("@qdrant/js-client-rest") vitest.mock("crypto") +vitest.mock("fs") vitest.mock("../../../../utils/path") vitest.mock("../../../../i18n", () => ({ t: (key: string, params?: any) => { @@ -68,6 +70,10 @@ describe("QdrantVectorStore", () => { // Mock getWorkspacePath ;(getWorkspacePath as any).mockReturnValue(mockWorkspacePath) + // Mock fs functions - default to no git repo and no config file + ;(fs.existsSync as any).mockReturnValue(false) + ;(fs.readFileSync as any).mockReturnValue("") + vectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey) }) @@ -82,6 +88,7 @@ describe("QdrantVectorStore", () => { "User-Agent": "Roo-Code", }, }) + // When no git repo or custom config, should fall back to workspace-based hash expect(createHash).toHaveBeenCalledWith("sha256") expect(mockCreateHashInstance.update).toHaveBeenCalledWith(mockWorkspacePath) expect(mockCreateHashInstance.digest).toHaveBeenCalledWith("hex") @@ -89,6 +96,93 @@ describe("QdrantVectorStore", () => { expect((vectorStore as any).collectionName).toBe(expectedCollectionName) expect((vectorStore as any).vectorSize).toBe(mockVectorSize) }) + + it("should use custom collection name from .roo/codebase-index.json if available", () => { + // Mock the config file to exist and contain a custom collection name + ;(fs.existsSync as any).mockImplementation((path: string) => { + return path.includes(".roo/codebase-index.json") + }) + ;(fs.readFileSync as any).mockReturnValue( + JSON.stringify({ + collectionName: "my-custom-collection", + }), + ) + + const customVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey) + + // Should use the sanitized custom collection name + expect((customVectorStore as any).collectionName).toBe("my-custom-collection") + }) + + it("should use git repository URL for deterministic naming when available", () => { + // Mock git config to exist + ;(fs.existsSync as any).mockImplementation((path: string) => { + return path.includes(".git") + }) + ;(fs.readFileSync as any).mockImplementation((path: string) => { + if (path.includes(".git/config")) { + return `[remote "origin"] + url = git@github.com:user/repo.git` + } + return "" + }) + + // Mock createHash for the git URL + const gitUrlHash = "gitrepo1234567890abcdef" + mockCreateHashInstance.digest.mockReturnValueOnce(gitUrlHash) + + const gitVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey) + + // Should use repo- prefix with git URL hash + expect((gitVectorStore as any).collectionName).toBe(`repo-${gitUrlHash.substring(0, 16)}`) + + // Verify it hashed the normalized git URL + expect(mockCreateHashInstance.update).toHaveBeenCalledWith("https://github.com/user/repo") + }) + + it("should normalize different git URL formats consistently", () => { + const testCases = [ + { input: "git@github.com:user/repo.git", expected: "https://github.com/user/repo" }, + { input: "https://github.com/user/repo.git", expected: "https://github.com/user/repo" }, + { input: "ssh://git@github.com/user/repo.git", expected: "https://github.com/user/repo" }, + { input: "https://user@github.com/user/repo.git", expected: "https://github.com/user/repo" }, + ] + + testCases.forEach(({ input, expected }) => { + vitest.clearAllMocks() + ;(fs.existsSync as any).mockImplementation((path: string) => path.includes(".git")) + ;(fs.readFileSync as any).mockImplementation((path: string) => { + if (path.includes(".git/config")) { + return `[remote "origin"]\n\turl = ${input}` + } + return "" + }) + + const gitUrlHash = "normalized1234567890abcdef" + mockCreateHashInstance.digest.mockReturnValueOnce(gitUrlHash) + + const store = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey) + + // Verify it hashed the normalized URL + expect(mockCreateHashInstance.update).toHaveBeenCalledWith(expected) + }) + }) + + it("should sanitize custom collection names to be Qdrant-compatible", () => { + ;(fs.existsSync as any).mockImplementation((path: string) => { + return path.includes(".roo/codebase-index.json") + }) + ;(fs.readFileSync as any).mockReturnValue( + JSON.stringify({ + collectionName: "My Custom Collection!@#$%", + }), + ) + + const customVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey) + + // Should sanitize the collection name + expect((customVectorStore as any).collectionName).toBe("my-custom-collection") + }) it("should handle constructor with default URL when none provided", () => { const vectorStoreWithDefaults = new QdrantVectorStore(mockWorkspacePath, undefined as any, mockVectorSize) diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index ce152824a7..3564dd4128 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -1,11 +1,13 @@ import { QdrantClient, Schemas } from "@qdrant/js-client-rest" import { createHash } from "crypto" import * as path from "path" +import * as fs from "fs" import { getWorkspacePath } from "../../../utils/path" import { IVectorStore } from "../interfaces/vector-store" import { Payload, VectorStoreSearchResult } from "../interfaces" import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants" import { t } from "../../../i18n" +import { getGitRepositoryInfo } from "../../../utils/git" /** * Qdrant implementation of the vector store interface @@ -77,10 +79,135 @@ export class QdrantVectorStore implements IVectorStore { }) } - // Generate collection name from workspace path - const hash = createHash("sha256").update(workspacePath).digest("hex") + // Generate deterministic collection name this.vectorSize = vectorSize - this.collectionName = `ws-${hash.substring(0, 16)}` + this.collectionName = this.generateCollectionName(workspacePath) + } + + /** + * Generates a deterministic collection name based on repository or workspace + * @param workspacePath Path to the workspace + * @returns Collection name + */ + private generateCollectionName(workspacePath: string): string { + // First, check for a custom collection name in .roo/codebase-index.json + const customName = this.loadCustomCollectionName(workspacePath) + if (customName) { + // Sanitize the custom name to ensure it's valid for Qdrant + return this.sanitizeCollectionName(customName) + } + + // Try to get git repository information for deterministic naming + const gitInfo = this.getGitInfoSync(workspacePath) + if (gitInfo?.repositoryUrl) { + // Use repository URL to generate a deterministic name + // This ensures the same collection name across worktrees and developers + const hash = createHash("sha256").update(gitInfo.repositoryUrl).digest("hex") + return `repo-${hash.substring(0, 16)}` + } + + // Fallback to workspace path hash (original behavior) + const hash = createHash("sha256").update(workspacePath).digest("hex") + return `ws-${hash.substring(0, 16)}` + } + + /** + * Loads custom collection name from .roo/codebase-index.json if it exists + * @param workspacePath Path to the workspace + * @returns Custom collection name or undefined + */ + private loadCustomCollectionName(workspacePath: string): string | undefined { + try { + const configPath = path.join(workspacePath, ".roo", "codebase-index.json") + if (fs.existsSync(configPath)) { + const config = JSON.parse(fs.readFileSync(configPath, "utf8")) + if (config.collectionName && typeof config.collectionName === "string") { + return config.collectionName + } + } + } catch (error) { + // Ignore errors reading config file + console.warn( + `[QdrantVectorStore] Could not read custom collection name from .roo/codebase-index.json:`, + error, + ) + } + return undefined + } + + /** + * Synchronously gets git repository information + * @param workspacePath Path to the workspace + * @returns Git repository info or undefined + */ + private getGitInfoSync(workspacePath: string): { repositoryUrl?: string } | undefined { + try { + const gitDir = path.join(workspacePath, ".git") + + // Check if .git directory exists + if (!fs.existsSync(gitDir)) { + return undefined + } + + // Try to read git config file + const configPath = path.join(gitDir, "config") + if (fs.existsSync(configPath)) { + const configContent = fs.readFileSync(configPath, "utf8") + + // Extract remote URL + const urlMatch = configContent.match(/url\s*=\s*(.+?)(?:\r?\n|$)/m) + if (urlMatch && urlMatch[1]) { + const url = urlMatch[1].trim() + // Normalize the URL to ensure consistency + const normalizedUrl = this.normalizeGitUrl(url) + return { repositoryUrl: normalizedUrl } + } + } + } catch (error) { + // Ignore errors and fall back to workspace-based naming + console.warn(`[QdrantVectorStore] Could not read git repository info:`, error) + } + return undefined + } + + /** + * Normalizes a git URL for consistent hashing + * @param url Git URL to normalize + * @returns Normalized URL + */ + private normalizeGitUrl(url: string): string { + // Remove credentials + let normalized = url.replace(/^https?:\/\/[^@]+@/, "https://") + + // Convert SSH to HTTPS format for consistency + if (normalized.startsWith("git@")) { + normalized = normalized.replace(/^git@([^:]+):/, "https://$1/") + } else if (normalized.startsWith("ssh://")) { + normalized = normalized.replace(/^ssh:\/\/(?:git@)?([^\/]+)\//, "https://$1/") + } + + // Remove .git suffix + normalized = normalized.replace(/\.git$/, "") + + // Convert to lowercase for consistency + normalized = normalized.toLowerCase() + + return normalized + } + + /** + * Sanitizes a collection name to ensure it's valid for Qdrant + * @param name Collection name to sanitize + * @returns Sanitized collection name + */ + private sanitizeCollectionName(name: string): string { + // Qdrant collection names must be alphanumeric with underscores or hyphens + // Max length is typically 255 characters + return name + .toLowerCase() + .replace(/[^a-z0-9_-]/g, "-") + .replace(/^-+|-+$/g, "") // Remove leading/trailing hyphens + .substring(0, 255) } /** From 426e703a0b197425efaeb9754e5871fa061ed3bd Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 12 Sep 2025 22:05:04 +0000 Subject: [PATCH 2/2] refactor: improve git URL normalization and error handling - Add better error handling in normalizeGitUrl method - Remove unused import of getGitRepositoryInfo - Improve URL credential removal logic --- .../code-index/vector-store/qdrant-client.ts | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index 3564dd4128..a47996cab2 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -7,7 +7,6 @@ import { IVectorStore } from "../interfaces/vector-store" import { Payload, VectorStoreSearchResult } from "../interfaces" import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants" import { t } from "../../../i18n" -import { getGitRepositoryInfo } from "../../../utils/git" /** * Qdrant implementation of the vector store interface @@ -102,7 +101,8 @@ export class QdrantVectorStore implements IVectorStore { if (gitInfo?.repositoryUrl) { // Use repository URL to generate a deterministic name // This ensures the same collection name across worktrees and developers - const hash = createHash("sha256").update(gitInfo.repositoryUrl).digest("hex") + const normalizedUrl = this.normalizeGitUrl(gitInfo.repositoryUrl) + const hash = createHash("sha256").update(normalizedUrl).digest("hex") return `repo-${hash.substring(0, 16)}` } @@ -158,9 +158,7 @@ export class QdrantVectorStore implements IVectorStore { const urlMatch = configContent.match(/url\s*=\s*(.+?)(?:\r?\n|$)/m) if (urlMatch && urlMatch[1]) { const url = urlMatch[1].trim() - // Normalize the URL to ensure consistency - const normalizedUrl = this.normalizeGitUrl(url) - return { repositoryUrl: normalizedUrl } + return { repositoryUrl: url } } } } catch (error) { @@ -176,23 +174,40 @@ export class QdrantVectorStore implements IVectorStore { * @returns Normalized URL */ private normalizeGitUrl(url: string): string { - // Remove credentials - let normalized = url.replace(/^https?:\/\/[^@]+@/, "https://") - - // Convert SSH to HTTPS format for consistency - if (normalized.startsWith("git@")) { - normalized = normalized.replace(/^git@([^:]+):/, "https://$1/") - } else if (normalized.startsWith("ssh://")) { - normalized = normalized.replace(/^ssh:\/\/(?:git@)?([^\/]+)\//, "https://$1/") - } + try { + // Remove credentials from HTTPS URLs + let normalized = url + if (url.startsWith("https://") || url.startsWith("http://")) { + try { + const urlObj = new URL(url) + urlObj.username = "" + urlObj.password = "" + normalized = urlObj.toString() + } catch { + // If URL parsing fails, just remove obvious credentials + normalized = url.replace(/^https?:\/\/[^@]+@/, "https://") + } + } - // Remove .git suffix - normalized = normalized.replace(/\.git$/, "") + // Convert SSH to HTTPS format for consistency + if (normalized.startsWith("git@")) { + normalized = normalized.replace(/^git@([^:]+):/, "https://$1/") + } else if (normalized.startsWith("ssh://")) { + normalized = normalized.replace(/^ssh:\/\/(?:git@)?([^\/]+)\//, "https://$1/") + } + + // Remove .git suffix + normalized = normalized.replace(/\.git$/, "") - // Convert to lowercase for consistency - normalized = normalized.toLowerCase() + // Convert to lowercase for consistency + normalized = normalized.toLowerCase() - return normalized + return normalized + } catch (error) { + // If normalization fails, return the original URL + console.warn(`[QdrantVectorStore] Could not normalize git URL:`, error) + return url.toLowerCase() + } } /**