Skip to content

Commit 351ca02

Browse files
committed
feat: add deterministic naming for Qdrant collections
- Add support for custom collection names via .roo/codebase-index.json - Use git repository URL for deterministic collection naming across worktrees - Fall back to workspace path hash when no git repo is available - Normalize git URLs for consistent hashing - Add comprehensive tests for new naming strategies Fixes #7940
1 parent c4c4780 commit 351ca02

File tree

2 files changed

+224
-3
lines changed

2 files changed

+224
-3
lines changed

src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { QdrantClient } from "@qdrant/js-client-rest"
22
import { createHash } from "crypto"
3+
import * as fs from "fs"
34

45
import { QdrantVectorStore } from "../qdrant-client"
56
import { getWorkspacePath } from "../../../../utils/path"
@@ -8,6 +9,7 @@ import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../../cons
89
// Mocks
910
vitest.mock("@qdrant/js-client-rest")
1011
vitest.mock("crypto")
12+
vitest.mock("fs")
1113
vitest.mock("../../../../utils/path")
1214
vitest.mock("../../../../i18n", () => ({
1315
t: (key: string, params?: any) => {
@@ -68,6 +70,10 @@ describe("QdrantVectorStore", () => {
6870
// Mock getWorkspacePath
6971
;(getWorkspacePath as any).mockReturnValue(mockWorkspacePath)
7072

73+
// Mock fs functions - default to no git repo and no config file
74+
;(fs.existsSync as any).mockReturnValue(false)
75+
;(fs.readFileSync as any).mockReturnValue("")
76+
7177
vectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)
7278
})
7379

@@ -82,13 +88,101 @@ describe("QdrantVectorStore", () => {
8288
"User-Agent": "Roo-Code",
8389
},
8490
})
91+
// When no git repo or custom config, should fall back to workspace-based hash
8592
expect(createHash).toHaveBeenCalledWith("sha256")
8693
expect(mockCreateHashInstance.update).toHaveBeenCalledWith(mockWorkspacePath)
8794
expect(mockCreateHashInstance.digest).toHaveBeenCalledWith("hex")
8895
// Access private member for testing constructor logic (not ideal, but necessary here)
8996
expect((vectorStore as any).collectionName).toBe(expectedCollectionName)
9097
expect((vectorStore as any).vectorSize).toBe(mockVectorSize)
9198
})
99+
100+
it("should use custom collection name from .roo/codebase-index.json if available", () => {
101+
// Mock the config file to exist and contain a custom collection name
102+
;(fs.existsSync as any).mockImplementation((path: string) => {
103+
return path.includes(".roo/codebase-index.json")
104+
})
105+
;(fs.readFileSync as any).mockReturnValue(
106+
JSON.stringify({
107+
collectionName: "my-custom-collection",
108+
}),
109+
)
110+
111+
const customVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)
112+
113+
// Should use the sanitized custom collection name
114+
expect((customVectorStore as any).collectionName).toBe("my-custom-collection")
115+
})
116+
117+
it("should use git repository URL for deterministic naming when available", () => {
118+
// Mock git config to exist
119+
;(fs.existsSync as any).mockImplementation((path: string) => {
120+
return path.includes(".git")
121+
})
122+
;(fs.readFileSync as any).mockImplementation((path: string) => {
123+
if (path.includes(".git/config")) {
124+
return `[remote "origin"]
125+
url = [email protected]:user/repo.git`
126+
}
127+
return ""
128+
})
129+
130+
// Mock createHash for the git URL
131+
const gitUrlHash = "gitrepo1234567890abcdef"
132+
mockCreateHashInstance.digest.mockReturnValueOnce(gitUrlHash)
133+
134+
const gitVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)
135+
136+
// Should use repo- prefix with git URL hash
137+
expect((gitVectorStore as any).collectionName).toBe(`repo-${gitUrlHash.substring(0, 16)}`)
138+
139+
// Verify it hashed the normalized git URL
140+
expect(mockCreateHashInstance.update).toHaveBeenCalledWith("https://github.com/user/repo")
141+
})
142+
143+
it("should normalize different git URL formats consistently", () => {
144+
const testCases = [
145+
{ input: "[email protected]:user/repo.git", expected: "https://github.com/user/repo" },
146+
{ input: "https://github.com/user/repo.git", expected: "https://github.com/user/repo" },
147+
{ input: "ssh://[email protected]/user/repo.git", expected: "https://github.com/user/repo" },
148+
{ input: "https://[email protected]/user/repo.git", expected: "https://github.com/user/repo" },
149+
]
150+
151+
testCases.forEach(({ input, expected }) => {
152+
vitest.clearAllMocks()
153+
;(fs.existsSync as any).mockImplementation((path: string) => path.includes(".git"))
154+
;(fs.readFileSync as any).mockImplementation((path: string) => {
155+
if (path.includes(".git/config")) {
156+
return `[remote "origin"]\n\turl = ${input}`
157+
}
158+
return ""
159+
})
160+
161+
const gitUrlHash = "normalized1234567890abcdef"
162+
mockCreateHashInstance.digest.mockReturnValueOnce(gitUrlHash)
163+
164+
const store = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)
165+
166+
// Verify it hashed the normalized URL
167+
expect(mockCreateHashInstance.update).toHaveBeenCalledWith(expected)
168+
})
169+
})
170+
171+
it("should sanitize custom collection names to be Qdrant-compatible", () => {
172+
;(fs.existsSync as any).mockImplementation((path: string) => {
173+
return path.includes(".roo/codebase-index.json")
174+
})
175+
;(fs.readFileSync as any).mockReturnValue(
176+
JSON.stringify({
177+
collectionName: "My Custom Collection!@#$%",
178+
}),
179+
)
180+
181+
const customVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, mockVectorSize, mockApiKey)
182+
183+
// Should sanitize the collection name
184+
expect((customVectorStore as any).collectionName).toBe("my-custom-collection")
185+
})
92186
it("should handle constructor with default URL when none provided", () => {
93187
const vectorStoreWithDefaults = new QdrantVectorStore(mockWorkspacePath, undefined as any, mockVectorSize)
94188

src/services/code-index/vector-store/qdrant-client.ts

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import { QdrantClient, Schemas } from "@qdrant/js-client-rest"
22
import { createHash } from "crypto"
33
import * as path from "path"
4+
import * as fs from "fs"
45
import { getWorkspacePath } from "../../../utils/path"
56
import { IVectorStore } from "../interfaces/vector-store"
67
import { Payload, VectorStoreSearchResult } from "../interfaces"
78
import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants"
89
import { t } from "../../../i18n"
10+
import { getGitRepositoryInfo } from "../../../utils/git"
911

1012
/**
1113
* Qdrant implementation of the vector store interface
@@ -77,10 +79,135 @@ export class QdrantVectorStore implements IVectorStore {
7779
})
7880
}
7981

80-
// Generate collection name from workspace path
81-
const hash = createHash("sha256").update(workspacePath).digest("hex")
82+
// Generate deterministic collection name
8283
this.vectorSize = vectorSize
83-
this.collectionName = `ws-${hash.substring(0, 16)}`
84+
this.collectionName = this.generateCollectionName(workspacePath)
85+
}
86+
87+
/**
88+
* Generates a deterministic collection name based on repository or workspace
89+
* @param workspacePath Path to the workspace
90+
* @returns Collection name
91+
*/
92+
private generateCollectionName(workspacePath: string): string {
93+
// First, check for a custom collection name in .roo/codebase-index.json
94+
const customName = this.loadCustomCollectionName(workspacePath)
95+
if (customName) {
96+
// Sanitize the custom name to ensure it's valid for Qdrant
97+
return this.sanitizeCollectionName(customName)
98+
}
99+
100+
// Try to get git repository information for deterministic naming
101+
const gitInfo = this.getGitInfoSync(workspacePath)
102+
if (gitInfo?.repositoryUrl) {
103+
// Use repository URL to generate a deterministic name
104+
// This ensures the same collection name across worktrees and developers
105+
const hash = createHash("sha256").update(gitInfo.repositoryUrl).digest("hex")
106+
return `repo-${hash.substring(0, 16)}`
107+
}
108+
109+
// Fallback to workspace path hash (original behavior)
110+
const hash = createHash("sha256").update(workspacePath).digest("hex")
111+
return `ws-${hash.substring(0, 16)}`
112+
}
113+
114+
/**
115+
* Loads custom collection name from .roo/codebase-index.json if it exists
116+
* @param workspacePath Path to the workspace
117+
* @returns Custom collection name or undefined
118+
*/
119+
private loadCustomCollectionName(workspacePath: string): string | undefined {
120+
try {
121+
const configPath = path.join(workspacePath, ".roo", "codebase-index.json")
122+
if (fs.existsSync(configPath)) {
123+
const config = JSON.parse(fs.readFileSync(configPath, "utf8"))
124+
if (config.collectionName && typeof config.collectionName === "string") {
125+
return config.collectionName
126+
}
127+
}
128+
} catch (error) {
129+
// Ignore errors reading config file
130+
console.warn(
131+
`[QdrantVectorStore] Could not read custom collection name from .roo/codebase-index.json:`,
132+
error,
133+
)
134+
}
135+
return undefined
136+
}
137+
138+
/**
139+
* Synchronously gets git repository information
140+
* @param workspacePath Path to the workspace
141+
* @returns Git repository info or undefined
142+
*/
143+
private getGitInfoSync(workspacePath: string): { repositoryUrl?: string } | undefined {
144+
try {
145+
const gitDir = path.join(workspacePath, ".git")
146+
147+
// Check if .git directory exists
148+
if (!fs.existsSync(gitDir)) {
149+
return undefined
150+
}
151+
152+
// Try to read git config file
153+
const configPath = path.join(gitDir, "config")
154+
if (fs.existsSync(configPath)) {
155+
const configContent = fs.readFileSync(configPath, "utf8")
156+
157+
// Extract remote URL
158+
const urlMatch = configContent.match(/url\s*=\s*(.+?)(?:\r?\n|$)/m)
159+
if (urlMatch && urlMatch[1]) {
160+
const url = urlMatch[1].trim()
161+
// Normalize the URL to ensure consistency
162+
const normalizedUrl = this.normalizeGitUrl(url)
163+
return { repositoryUrl: normalizedUrl }
164+
}
165+
}
166+
} catch (error) {
167+
// Ignore errors and fall back to workspace-based naming
168+
console.warn(`[QdrantVectorStore] Could not read git repository info:`, error)
169+
}
170+
return undefined
171+
}
172+
173+
/**
174+
* Normalizes a git URL for consistent hashing
175+
* @param url Git URL to normalize
176+
* @returns Normalized URL
177+
*/
178+
private normalizeGitUrl(url: string): string {
179+
// Remove credentials
180+
let normalized = url.replace(/^https?:\/\/[^@]+@/, "https://")
181+
182+
// Convert SSH to HTTPS format for consistency
183+
if (normalized.startsWith("git@")) {
184+
normalized = normalized.replace(/^git@([^:]+):/, "https://$1/")
185+
} else if (normalized.startsWith("ssh://")) {
186+
normalized = normalized.replace(/^ssh:\/\/(?:git@)?([^\/]+)\//, "https://$1/")
187+
}
188+
189+
// Remove .git suffix
190+
normalized = normalized.replace(/\.git$/, "")
191+
192+
// Convert to lowercase for consistency
193+
normalized = normalized.toLowerCase()
194+
195+
return normalized
196+
}
197+
198+
/**
199+
* Sanitizes a collection name to ensure it's valid for Qdrant
200+
* @param name Collection name to sanitize
201+
* @returns Sanitized collection name
202+
*/
203+
private sanitizeCollectionName(name: string): string {
204+
// Qdrant collection names must be alphanumeric with underscores or hyphens
205+
// Max length is typically 255 characters
206+
return name
207+
.toLowerCase()
208+
.replace(/[^a-z0-9_-]/g, "-")
209+
.replace(/^-+|-+$/g, "") // Remove leading/trailing hyphens
210+
.substring(0, 255)
84211
}
85212

86213
/**

0 commit comments

Comments
 (0)