Skip to content

Commit cbaf635

Browse files
committed
feat(context): add git repository identifier support for consistent collection naming
1 parent f50bb47 commit cbaf635

File tree

3 files changed

+85
-22
lines changed

3 files changed

+85
-22
lines changed

packages/core/src/context.ts

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ export class Context {
213213
/**
214214
* Public wrapper for prepareCollection private method
215215
*/
216-
async getPreparedCollection(codebasePath: string): Promise<void> {
217-
return this.prepareCollection(codebasePath);
216+
async getPreparedCollection(codebasePath: string, gitRepoIdentifier?: string | null): Promise<void> {
217+
return this.prepareCollection(codebasePath, false, gitRepoIdentifier);
218218
}
219219

220220
/**
@@ -230,12 +230,31 @@ export class Context {
230230

231231
/**
232232
* Generate collection name based on codebase path and hybrid mode
233+
* Optionally accepts a git repository identifier for consistent naming across different local paths
233234
*/
234-
public getCollectionName(codebasePath: string): string {
235+
public getCollectionName(codebasePath: string, gitRepoIdentifier?: string | null): string {
235236
const isHybrid = this.getIsHybrid();
237+
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
238+
239+
// If git repository identifier is provided, use it for collection naming
240+
if (gitRepoIdentifier) {
241+
// Create a clean identifier by replacing special characters
242+
const cleanIdentifier = gitRepoIdentifier
243+
.replace(/[^a-zA-Z0-9]/g, '_') // Replace non-alphanumeric with underscore
244+
.toLowerCase()
245+
.substring(0, 32); // Limit length for collection name
246+
247+
// Create hash from the git identifier for uniqueness
248+
const hash = crypto.createHash('md5').update(gitRepoIdentifier).digest('hex');
249+
250+
console.log(`[Context] Using git-based collection naming for: ${gitRepoIdentifier}`);
251+
return `${prefix}_git_${cleanIdentifier}_${hash.substring(0, 8)}`;
252+
}
253+
254+
// Fallback to path-based naming (original behavior)
236255
const normalizedPath = path.resolve(codebasePath);
237256
const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
238-
const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
257+
console.log(`[Context] Using path-based collection naming for: ${normalizedPath}`);
239258
return `${prefix}_${hash.substring(0, 8)}`;
240259
}
241260

@@ -405,13 +424,15 @@ export class Context {
405424
* @param query Search query
406425
* @param topK Number of results to return
407426
* @param threshold Similarity threshold
427+
* @param filterExpr Optional filter expression
428+
* @param gitRepoIdentifier Optional git repository identifier for consistent collection naming
408429
*/
409-
async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5, filterExpr?: string): Promise<SemanticSearchResult[]> {
430+
async semanticSearch(codebasePath: string, query: string, topK: number = 5, threshold: number = 0.5, filterExpr?: string, gitRepoIdentifier?: string | null): Promise<SemanticSearchResult[]> {
410431
const isHybrid = this.getIsHybrid();
411432
const searchType = isHybrid === true ? 'hybrid search' : 'semantic search';
412433
console.log(`[Context] 🔍 Executing ${searchType}: "${query}" in ${codebasePath}`);
413434

414-
const collectionName = this.getCollectionName(codebasePath);
435+
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
415436
console.log(`[Context] 🔍 Using collection: ${collectionName}`);
416437

417438
// Check if collection exists and has data
@@ -518,27 +539,30 @@ export class Context {
518539
/**
519540
* Check if index exists for codebase
520541
* @param codebasePath Codebase path to check
542+
* @param gitRepoIdentifier Optional git repository identifier for consistent collection naming
521543
* @returns Whether index exists
522544
*/
523-
async hasIndex(codebasePath: string): Promise<boolean> {
524-
const collectionName = this.getCollectionName(codebasePath);
545+
async hasIndex(codebasePath: string, gitRepoIdentifier?: string | null): Promise<boolean> {
546+
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
525547
return await this.vectorDatabase.hasCollection(collectionName);
526548
}
527549

528550
/**
529551
* Clear index
530552
* @param codebasePath Codebase path to clear index for
531553
* @param progressCallback Optional progress callback function
554+
* @param gitRepoIdentifier Optional git repository identifier for consistent collection naming
532555
*/
533556
async clearIndex(
534557
codebasePath: string,
535-
progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void
558+
progressCallback?: (progress: { phase: string; current: number; total: number; percentage: number }) => void,
559+
gitRepoIdentifier?: string | null
536560
): Promise<void> {
537561
console.log(`[Context] 🧹 Cleaning index data for ${codebasePath}...`);
538562

539563
progressCallback?.({ phase: 'Checking existing index...', current: 0, total: 100, percentage: 0 });
540564

541-
const collectionName = this.getCollectionName(codebasePath);
565+
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
542566
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);
543567

544568
progressCallback?.({ phase: 'Removing index data...', current: 50, total: 100, percentage: 50 });
@@ -622,11 +646,11 @@ export class Context {
622646
/**
623647
* Prepare vector collection
624648
*/
625-
private async prepareCollection(codebasePath: string, forceReindex: boolean = false): Promise<void> {
649+
private async prepareCollection(codebasePath: string, forceReindex: boolean = false, gitRepoIdentifier?: string | null): Promise<void> {
626650
const isHybrid = this.getIsHybrid();
627651
const collectionType = isHybrid === true ? 'hybrid vector' : 'vector';
628652
console.log(`[Context] 🔧 Preparing ${collectionType} collection for codebase: ${codebasePath}${forceReindex ? ' (FORCE REINDEX)' : ''}`);
629-
const collectionName = this.getCollectionName(codebasePath);
653+
const collectionName = this.getCollectionName(codebasePath, gitRepoIdentifier);
630654

631655
// Check if collection already exists
632656
const collectionExists = await this.vectorDatabase.hasCollection(collectionName);

packages/mcp/src/handlers.ts

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@ import * as path from "path";
33
import * as crypto from "crypto";
44
import { Context, COLLECTION_LIMIT_MESSAGE } from "@zilliz/claude-context-core";
55
import { SnapshotManager } from "./snapshot.js";
6-
import { ensureAbsolutePath, truncateContent, trackCodebasePath } from "./utils.js";
6+
import {
7+
ensureAbsolutePath,
8+
truncateContent,
9+
trackCodebasePath,
10+
getRepositoryIdentifier
11+
} from "./utils.js";
712

813
export class ToolHandlers {
914
private context: Context;
@@ -199,8 +204,14 @@ export class ToolHandlers {
199204
};
200205
}
201206

207+
// Get git repository identifier for consistent collection naming
208+
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
209+
if (gitRepoIdentifier) {
210+
console.log(`[INDEX-VALIDATION] 🔗 Git repository detected: ${gitRepoIdentifier}`);
211+
}
212+
202213
//Check if the snapshot and cloud index are in sync
203-
if (this.snapshotManager.getIndexedCodebases().includes(absolutePath) !== await this.context.hasIndex(absolutePath)) {
214+
if (this.snapshotManager.getIndexedCodebases().includes(absolutePath) !== await this.context.hasIndex(absolutePath, gitRepoIdentifier)) {
204215
console.warn(`[INDEX-VALIDATION] ❌ Snapshot and cloud index mismatch: ${absolutePath}`);
205216
}
206217

@@ -221,9 +232,9 @@ export class ToolHandlers {
221232
console.log(`[FORCE-REINDEX] 🔄 Removing '${absolutePath}' from indexed list for re-indexing`);
222233
this.snapshotManager.removeIndexedCodebase(absolutePath);
223234
}
224-
if (await this.context.hasIndex(absolutePath)) {
235+
if (await this.context.hasIndex(absolutePath, gitRepoIdentifier)) {
225236
console.log(`[FORCE-REINDEX] 🔄 Clearing index for '${absolutePath}'`);
226-
await this.context.clearIndex(absolutePath);
237+
await this.context.clearIndex(absolutePath, undefined, gitRepoIdentifier);
227238
}
228239
}
229240

@@ -339,6 +350,14 @@ export class ToolHandlers {
339350
console.warn(`[BACKGROUND-INDEX] Non-AST splitter '${splitterType}' requested; falling back to AST splitter`);
340351
}
341352

353+
// Get git repository identifier if available
354+
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
355+
if (gitRepoIdentifier) {
356+
console.log(`[BACKGROUND-INDEX] 🔗 Git repository detected: ${gitRepoIdentifier}`);
357+
} else {
358+
console.log(`[BACKGROUND-INDEX] 📁 Using path-based identification (not a git repository or no remote)`);
359+
}
360+
342361
// Load ignore patterns from files first (including .ignore, .gitignore, etc.)
343362
await this.context.getLoadedIgnorePatterns(absolutePath);
344363

@@ -350,8 +369,8 @@ export class ToolHandlers {
350369
await synchronizer.initialize();
351370

352371
// Store synchronizer in the context (let context manage collection names)
353-
await this.context.getPreparedCollection(absolutePath);
354-
const collectionName = this.context.getCollectionName(absolutePath);
372+
await this.context.getPreparedCollection(absolutePath, gitRepoIdentifier);
373+
const collectionName = this.context.getCollectionName(absolutePath, gitRepoIdentifier);
355374
this.context.setSynchronizer(collectionName, synchronizer);
356375
if (contextForThisTask !== this.context) {
357376
contextForThisTask.setSynchronizer(collectionName, synchronizer);
@@ -447,6 +466,12 @@ export class ToolHandlers {
447466

448467
trackCodebasePath(absolutePath);
449468

469+
// Get git repository identifier if available for consistent collection naming
470+
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
471+
if (gitRepoIdentifier) {
472+
console.log(`[SEARCH] 🔗 Git repository detected: ${gitRepoIdentifier}`);
473+
}
474+
450475
// Check if this codebase is indexed or being indexed
451476
const isIndexed = this.snapshotManager.getIndexedCodebases().includes(absolutePath);
452477
const isIndexing = this.snapshotManager.getIndexingCodebases().includes(absolutePath);
@@ -500,7 +525,8 @@ export class ToolHandlers {
500525
query,
501526
Math.min(resultLimit, 50),
502527
0.3,
503-
filterExpr
528+
filterExpr,
529+
gitRepoIdentifier
504530
);
505531

506532
console.log(`[SEARCH] ✅ Search completed! Found ${searchResults.length} results using ${embeddingProvider.getProvider()} embeddings`);
@@ -621,10 +647,16 @@ export class ToolHandlers {
621647
};
622648
}
623649

650+
// Get git repository identifier for consistent collection naming
651+
const gitRepoIdentifier = getRepositoryIdentifier(absolutePath);
652+
if (gitRepoIdentifier) {
653+
console.log(`[CLEAR] 🔗 Git repository detected: ${gitRepoIdentifier}`);
654+
}
655+
624656
console.log(`[CLEAR] Clearing codebase: ${absolutePath}`);
625657

626658
try {
627-
await this.context.clearIndex(absolutePath);
659+
await this.context.clearIndex(absolutePath, undefined, gitRepoIdentifier);
628660
console.log(`[CLEAR] Successfully cleared index for: ${absolutePath}`);
629661
} catch (error: any) {
630662
const errorMsg = `Failed to clear ${absolutePath}: ${error.message}`;

packages/mcp/src/index.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ Index a codebase directory to enable semantic search using a configurable code s
8888
8989
⚠️ **IMPORTANT**:
9090
- You MUST provide an absolute path to the target codebase.
91+
- If the path is a git repository with a remote URL, it will automatically use the git remote for consistent collection naming across different local paths.
92+
93+
🔗 **Git Repository Support**:
94+
- Automatically detects git repositories and uses remote URL for collection naming
95+
- Same repository cloned to different paths will share the same collection
96+
- Ensures consistency across team members and machines
9197
9298
✨ **Usage Guidance**:
9399
- This tool is typically used when search fails due to an unindexed codebase.
@@ -100,6 +106,7 @@ Search the indexed codebase using natural language queries within a specified ab
100106
101107
⚠️ **IMPORTANT**:
102108
- You MUST provide an absolute path.
109+
- If the path is a git repository, it will automatically use the correct collection based on the git remote URL.
103110
104111
🎯 **When to Use**:
105112
This tool is versatile and can be used before completing various tasks to retrieve relevant context:
@@ -195,7 +202,7 @@ This tool is versatile and can be used before completing various tasks to retrie
195202
},
196203
{
197204
name: "clear_index",
198-
description: `Clear the search index. IMPORTANT: You MUST provide an absolute path.`,
205+
description: `Clear the search index. IMPORTANT: You MUST provide an absolute path. Git repositories will be identified by their remote URL for accurate clearing.`,
199206
inputSchema: {
200207
type: "object",
201208
properties: {
@@ -209,7 +216,7 @@ This tool is versatile and can be used before completing various tasks to retrie
209216
},
210217
{
211218
name: "get_indexing_status",
212-
description: `Get the current indexing status of a codebase. Shows progress percentage for actively indexing codebases and completion status for indexed codebases.`,
219+
description: `Get the current indexing status of a codebase. Shows progress percentage for actively indexing codebases and completion status for indexed codebases. Git repositories are identified by their remote URL.`,
213220
inputSchema: {
214221
type: "object",
215222
properties: {

0 commit comments

Comments
 (0)