Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { QdrantClient } from "@qdrant/js-client-rest"
import { createHash } from "crypto"
import * as fs from "fs"

import { QdrantVectorStore } from "../qdrant-client"
import { getWorkspacePath } from "../../../../utils/path"
Expand All @@ -8,6 +9,7 @@ import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../cons
// Mocks
vitest.mock("@qdrant/js-client-rest")
vitest.mock("crypto")
vitest.mock("fs")
vitest.mock("../../../../utils/path")
vitest.mock("../../../../i18n", () => ({
t: (key: string, params?: any) => {
Expand Down Expand Up @@ -68,6 +70,10 @@ describe("QdrantVectorStore", () => {
// Mock getWorkspacePath
;(getWorkspacePath as any).mockReturnValue(mockWorkspacePath)

// Mock fs functions - default to no git repo and no config file
;(fs.existsSync as any).mockReturnValue(false)
;(fs.readFileSync as any).mockReturnValue("")

vectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)
})

Expand All @@ -82,13 +88,101 @@ describe("QdrantVectorStore", () => {
"User-Agent": "Roo-Code",
},
})
// When no git repo or custom config, should fall back to workspace-based hash
expect(createHash).toHaveBeenCalledWith("sha256")
expect(mockCreateHashInstance.update).toHaveBeenCalledWith(mockWorkspacePath)
expect(mockCreateHashInstance.digest).toHaveBeenCalledWith("hex")
// Access private member for testing constructor logic (not ideal, but necessary here)
expect((vectorStore as any).collectionName).toBe(expectedCollectionName)
expect((vectorStore as any).vectorSize).toBe(mockVectorSize)
})

it("should use custom collection name from .roo/codebase-index.json if available", () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great test coverage! Consider adding a few more edge case tests:

  • Malformed JSON in .roo/codebase-index.json
  • Git repositories with multiple remotes
  • Collection name length edge cases (approaching the 255 character limit)

These would help ensure robustness across different user configurations.

// Mock the config file to exist and contain a custom collection name
;(fs.existsSync as any).mockImplementation((path: string) => {
return path.includes(".roo/codebase-index.json")
})
;(fs.readFileSync as any).mockReturnValue(
JSON.stringify({
collectionName: "my-custom-collection",
}),
)

const customVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)

// Should use the sanitized custom collection name
expect((customVectorStore as any).collectionName).toBe("my-custom-collection")
})

it("should use git repository URL for deterministic naming when available", () => {
// Mock git config to exist
;(fs.existsSync as any).mockImplementation((path: string) => {
return path.includes(".git")
})
;(fs.readFileSync as any).mockImplementation((path: string) => {
if (path.includes(".git/config")) {
return `[remote "origin"]
url = [email protected]:user/repo.git`
}
return ""
})

// Mock createHash for the git URL
const gitUrlHash = "gitrepo1234567890abcdef"
mockCreateHashInstance.digest.mockReturnValueOnce(gitUrlHash)

const gitVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)

// Should use repo- prefix with git URL hash
expect((gitVectorStore as any).collectionName).toBe(`repo-${gitUrlHash.substring(0, 16)}`)

// Verify it hashed the normalized git URL
expect(mockCreateHashInstance.update).toHaveBeenCalledWith("https://github.com/user/repo")
})

it("should normalize different git URL formats consistently", () => {
const testCases = [
{ input: "[email protected]:user/repo.git", expected: "https://github.com/user/repo" },
{ input: "https://github.com/user/repo.git", expected: "https://github.com/user/repo" },
{ input: "ssh://[email protected]/user/repo.git", expected: "https://github.com/user/repo" },
{ input: "https://[email protected]/user/repo.git", expected: "https://github.com/user/repo" },
]

testCases.forEach(({ input, expected }) => {
vitest.clearAllMocks()
;(fs.existsSync as any).mockImplementation((path: string) => path.includes(".git"))
;(fs.readFileSync as any).mockImplementation((path: string) => {
if (path.includes(".git/config")) {
return `[remote "origin"]\n\turl = ${input}`
}
return ""
})

const gitUrlHash = "normalized1234567890abcdef"
mockCreateHashInstance.digest.mockReturnValueOnce(gitUrlHash)

const store = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)

// Verify it hashed the normalized URL
expect(mockCreateHashInstance.update).toHaveBeenCalledWith(expected)
})
})

it("should sanitize custom collection names to be Qdrant-compatible", () => {
;(fs.existsSync as any).mockImplementation((path: string) => {
return path.includes(".roo/codebase-index.json")
})
;(fs.readFileSync as any).mockReturnValue(
JSON.stringify({
collectionName: "My Custom Collection!@#$%",
}),
)

const customVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)

// Should sanitize the collection name
expect((customVectorStore as any).collectionName).toBe("my-custom-collection")
})
it("should handle constructor with default URL when none provided", () => {
const vectorStoreWithDefaults = new QdrantVectorStore(mockWorkspacePath, undefined as any, mockVectorSize)

Expand Down
148 changes: 145 additions & 3 deletions src/services/code-index/vector-store/qdrant-client.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { QdrantClient, Schemas } from "@qdrant/js-client-rest"
import { createHash } from "crypto"
import * as path from "path"
import * as fs from "fs"
import { getWorkspacePath } from "../../../utils/path"
import { IVectorStore } from "../interfaces/vector-store"
import { Payload, VectorStoreSearchResult } from "../interfaces"
Expand Down Expand Up @@ -77,10 +78,151 @@ export class QdrantVectorStore implements IVectorStore {
})
}

// Generate collection name from workspace path
const hash = createHash("sha256").update(workspacePath).digest("hex")
// Generate deterministic collection name
this.vectorSize = vectorSize
this.collectionName = `ws-${hash.substring(0, 16)}`
this.collectionName = this.generateCollectionName(workspacePath)
}

/**
* Generates a deterministic collection name based on repository or workspace
* @param workspacePath Path to the workspace
* @returns Collection name
*/
private generateCollectionName(workspacePath: string): string {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding JSDoc comments to these new private methods explaining the fallback hierarchy (custom name → git URL → workspace path). This would make the code more self-documenting for future contributors.

// First, check for a custom collection name in .roo/codebase-index.json
const customName = this.loadCustomCollectionName(workspacePath)
if (customName) {
// Sanitize the custom name to ensure it's valid for Qdrant
return this.sanitizeCollectionName(customName)
}

// Try to get git repository information for deterministic naming
const gitInfo = this.getGitInfoSync(workspacePath)
if (gitInfo?.repositoryUrl) {
// Use repository URL to generate a deterministic name
// This ensures the same collection name across worktrees and developers
const normalizedUrl = this.normalizeGitUrl(gitInfo.repositoryUrl)
const hash = createHash("sha256").update(normalizedUrl).digest("hex")
return `repo-${hash.substring(0, 16)}`
}

// Fallback to workspace path hash (original behavior)
const hash = createHash("sha256").update(workspacePath).digest("hex")
return `ws-${hash.substring(0, 16)}`
}

/**
* Loads custom collection name from .roo/codebase-index.json if it exists
* @param workspacePath Path to the workspace
* @returns Custom collection name or undefined
*/
private loadCustomCollectionName(workspacePath: string): string | undefined {
try {
const configPath = path.join(workspacePath, ".roo", "codebase-index.json")
if (fs.existsSync(configPath)) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor performance note: These synchronous file operations could potentially block the event loop. Since this only happens during initialization it's probably fine, but worth noting for future optimization if initialization performance becomes a concern.

const config = JSON.parse(fs.readFileSync(configPath, "utf8"))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently this silently falls back when JSON parsing fails. Should we consider logging malformed JSON at a higher level than console.warn to help users debug configuration issues? Or perhaps add validation for the expected schema?

if (config.collectionName && typeof config.collectionName === "string") {
return config.collectionName
}
}
} catch (error) {
// Ignore errors reading config file
console.warn(
`[QdrantVectorStore] Could not read custom collection name from .roo/codebase-index.json:`,
error,
)
}
return undefined
}

/**
* Synchronously gets git repository information
* @param workspacePath Path to the workspace
* @returns Git repository info or undefined
*/
private getGitInfoSync(workspacePath: string): { repositoryUrl?: string } | undefined {
try {
const gitDir = path.join(workspacePath, ".git")

// Check if .git directory exists
if (!fs.existsSync(gitDir)) {
return undefined
}

// Try to read git config file
const configPath = path.join(gitDir, "config")
if (fs.existsSync(configPath)) {
const configContent = fs.readFileSync(configPath, "utf8")

// Extract remote URL
const urlMatch = configContent.match(/url\s*=\s*(.+?)(?:\r?\n|$)/m)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regex pattern might not handle all edge cases in git config files (e.g., URLs with spaces or special characters). Have you considered using a more robust git config parser library, or should we add more comprehensive error handling for edge cases?

if (urlMatch && urlMatch[1]) {
const url = urlMatch[1].trim()
return { repositoryUrl: url }
}
}
} catch (error) {
// Ignore errors and fall back to workspace-based naming
console.warn(`[QdrantVectorStore] Could not read git repository info:`, error)
}
return undefined
}

/**
* Normalizes a git URL for consistent hashing
* @param url Git URL to normalize
* @returns Normalized URL
*/
private normalizeGitUrl(url: string): string {
try {
// Remove credentials from HTTPS URLs
let normalized = url
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The credential removal logic here is good, but consider adding a comment explaining why this is important for security (preventing credential leakage in collection names). This helps future maintainers understand the security implications.

if (url.startsWith("https://") || url.startsWith("http://")) {
try {
const urlObj = new URL(url)
urlObj.username = ""
urlObj.password = ""
normalized = urlObj.toString()
} catch {
// If URL parsing fails, just remove obvious credentials
normalized = url.replace(/^https?:\/\/[^@]+@/, "https://")
}
}

// Convert SSH to HTTPS format for consistency
if (normalized.startsWith("git@")) {
normalized = normalized.replace(/^git@([^:]+):/, "https://$1/")
} else if (normalized.startsWith("ssh://")) {
normalized = normalized.replace(/^ssh:\/\/(?:git@)?([^\/]+)\//, "https://$1/")
}

// Remove .git suffix
normalized = normalized.replace(/\.git$/, "")

// Convert to lowercase for consistency
normalized = normalized.toLowerCase()

return normalized
} catch (error) {
// If normalization fails, return the original URL
console.warn(`[QdrantVectorStore] Could not normalize git URL:`, error)
return url.toLowerCase()
}
}

/**
* Sanitizes a collection name to ensure it's valid for Qdrant
* @param name Collection name to sanitize
* @returns Sanitized collection name
*/
private sanitizeCollectionName(name: string): string {
// Qdrant collection names must be alphanumeric with underscores or hyphens
// Max length is typically 255 characters
return name
.toLowerCase()
.replace(/[^a-z0-9_-]/g, "-")
.replace(/^-+|-+$/g, "") // Remove leading/trailing hyphens
.substring(0, 255)
}

/**
Expand Down
Loading