diff --git a/src/core/prompts/sections/__tests__/custom-instructions.spec.ts b/src/core/prompts/sections/__tests__/custom-instructions.spec.ts index 49904465ab..60c5b654ad 100644 --- a/src/core/prompts/sections/__tests__/custom-instructions.spec.ts +++ b/src/core/prompts/sections/__tests__/custom-instructions.spec.ts @@ -1605,3 +1605,120 @@ describe("Rules directory reading", () => { expect(result).toBe("\n# Rules from .roorules:\nfallback content\n") }) }) + +describe("File deduplication", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + test("should skip files with duplicate content (same content hash)", async () => { + // Mock directory structure with different files having identical content + const mockEntries = [ + { + name: "file1.md", + isFile: () => true, + isSymbolicLink: () => false, + isDirectory: () => false, + parentPath: "/fake/path/.roo/rules", + }, + { + name: "file2.md", + isFile: () => true, + isSymbolicLink: () => false, + isDirectory: () => false, + parentPath: "/fake/path/.roo/rules", + }, + { + name: "file3.md", + isFile: () => true, + isSymbolicLink: () => false, + isDirectory: () => false, + parentPath: "/fake/path/.roo/rules", + }, + ] + + // Mock fs operations + statMock.mockResolvedValueOnce({ isDirectory: () => true }) // .roo/rules exists + readdirMock.mockResolvedValueOnce(mockEntries) + + // Mock file stats for each file + statMock.mockResolvedValueOnce({ isFile: () => true }) // file1.md + statMock.mockResolvedValueOnce({ isFile: () => true }) // file2.md + statMock.mockResolvedValueOnce({ isFile: () => true }) // file3.md + + // Mock file content reading - file2 and file3 have identical content + readFileMock.mockResolvedValueOnce("unique content") // file1.md + readFileMock.mockResolvedValueOnce("duplicate content") // file2.md + readFileMock.mockResolvedValueOnce("duplicate content") // file3.md (should be skipped due to duplicate hash) + + const result = await loadRuleFiles("/fake/path") + + // Should include both unique contents, but duplicate content only once + expect(result).toContain("unique content") + expect(result).toContain("duplicate content") + + // Count occurrences of "duplicate content" - should only appear once + const duplicateContentMatches = (result.match(/duplicate content/g) || []).length + expect(duplicateContentMatches).toBe(1) + }) + + test("should skip files with duplicate resolved paths via symlinks", async () => { + // Mock directory structure with symlinks pointing to the same file + const mockEntries = [ + { + name: "original.md", + isFile: () => true, + isSymbolicLink: () => false, + isDirectory: () => false, + parentPath: "/fake/path/.roo/rules", + }, + { + name: "symlink1.md", + isFile: () => false, + isSymbolicLink: () => true, + isDirectory: () => false, + parentPath: "/fake/path/.roo/rules", + }, + { + name: "symlink2.md", + isFile: () => false, + isSymbolicLink: () => true, + isDirectory: () => false, + parentPath: "/fake/path/.roo/rules", + }, + ] + + // Mock fs operations + statMock.mockResolvedValueOnce({ isDirectory: () => true }) // .roo/rules exists + readdirMock.mockResolvedValueOnce(mockEntries) + + // Mock file stats + statMock.mockResolvedValueOnce({ isFile: () => true }) // original.md + + // Mock symlink resolution - both symlinks point to original.md + readlinkMock.mockResolvedValueOnce("original.md") // symlink1 -> original.md + statMock.mockResolvedValueOnce({ isFile: () => true }) // symlink1 target + + readlinkMock.mockResolvedValueOnce("original.md") // symlink2 -> original.md + statMock.mockResolvedValueOnce({ isFile: () => true }) // symlink2 target + + // Mock file content reading + readFileMock.mockResolvedValueOnce("original content") // original.md + readFileMock.mockResolvedValueOnce("original content") // symlink1 target (should be skipped - same resolved path) + readFileMock.mockResolvedValueOnce("original content") // symlink2 target (should be skipped - same resolved path) + + const result = await loadRuleFiles("/fake/path") + + // Should only include original content once + expect(result).toContain("original content") + + // Count occurrences of "original content" - should only appear once + const originalContentMatches = (result.match(/original content/g) || []).length + expect(originalContentMatches).toBe(1) + }) + + // Note: The deduplication functionality is tested by the two tests above. + // Additional integration testing would require more complex mock setup + // that may be brittle. The core functionality is verified by the + // content hash and resolved path deduplication tests. +}) diff --git a/src/core/prompts/sections/custom-instructions.ts b/src/core/prompts/sections/custom-instructions.ts index 2d70e45419..33eac65444 100644 --- a/src/core/prompts/sections/custom-instructions.ts +++ b/src/core/prompts/sections/custom-instructions.ts @@ -10,6 +10,49 @@ import type { SystemPromptSettings } from "../types" import { LANGUAGES } from "../../../shared/language" import { getRooDirectoriesForCwd, getGlobalRooDirectory } from "../../../services/roo-config" +/** + * File tracking system to prevent duplicate inclusions + */ +interface FileTracker { + resolvedPaths: Set + contentHashes: Set +} + +/** + * Create content hash using FNV-1a algorithm (non-cryptographic, fast, cross-platform) + */ +function createContentHash(content: string): string { + let hash = 2166136261 // FNV offset basis (32-bit) + + for (let i = 0; i < content.length; i++) { + hash ^= content.charCodeAt(i) + hash = Math.imul(hash, 16777619) // FNV prime (32-bit) + } + + return (hash >>> 0).toString(16) // Convert to unsigned 32-bit hex +} + +/** + * Check if a file should be included based on resolved path and content hash + */ +function shouldIncludeFile(resolvedPath: string, content: string, tracker: FileTracker): boolean { + // Skip if we've already included this resolved path + if (tracker.resolvedPaths.has(resolvedPath)) { + return false + } + + // Skip if we've already included content with this hash + const contentHash = createContentHash(content) + if (tracker.contentHashes.has(contentHash)) { + return false + } + + // Add to tracking sets + tracker.resolvedPaths.add(resolvedPath) + tracker.contentHashes.add(contentHash) + return true +} + /** * Safely read a file and return its trimmed content */ @@ -108,7 +151,10 @@ async function resolveSymLink( /** * Read all text files from a directory in alphabetical order */ -async function readTextFilesFromDirectory(dirPath: string): Promise> { +async function readTextFilesFromDirectory( + dirPath: string, + tracker: FileTracker, +): Promise> { try { const entries = await fs.readdir(dirPath, { withFileTypes: true, recursive: true }) @@ -136,6 +182,12 @@ async function readTextFilesFromDirectory(dirPath: string): Promise { +export async function loadRuleFiles(cwd: string, tracker?: FileTracker): Promise { + // Create a default tracker if none provided (for backward compatibility with tests) + const fileTracker = tracker || { + resolvedPaths: new Set(), + contentHashes: new Set(), + } + const rules: string[] = [] const rooDirectories = getRooDirectoriesForCwd(cwd) @@ -190,7 +248,7 @@ export async function loadRuleFiles(cwd: string): Promise { for (const rooDir of rooDirectories) { const rulesDir = path.join(rooDir, "rules") if (await directoryExists(rulesDir)) { - const files = await readTextFilesFromDirectory(rulesDir) + const files = await readTextFilesFromDirectory(rulesDir, fileTracker) if (files.length > 0) { const content = formatDirectoryContent(rulesDir, files) rules.push(content) @@ -207,8 +265,9 @@ export async function loadRuleFiles(cwd: string): Promise { const ruleFiles = [".roorules", ".clinerules"] for (const file of ruleFiles) { - const content = await safeReadFile(path.join(cwd, file)) - if (content) { + const filePath = path.join(cwd, file) + const content = await safeReadFile(filePath) + if (content && shouldIncludeFile(filePath, content, fileTracker)) { return `\n# Rules from ${file}:\n${content}\n` } } @@ -220,7 +279,7 @@ export async function loadRuleFiles(cwd: string): Promise { * Load AGENTS.md or AGENT.md file from the project root if it exists * Checks for both AGENTS.md (standard) and AGENT.md (alternative) for compatibility */ -async function loadAgentRulesFile(cwd: string): Promise { +async function loadAgentRulesFile(cwd: string, tracker: FileTracker): Promise { // Try both filenames - AGENTS.md (standard) first, then AGENT.md (alternative) const filenames = ["AGENTS.md", "AGENT.md"] @@ -251,7 +310,7 @@ async function loadAgentRulesFile(cwd: string): Promise { // Read the content from the resolved path const content = await safeReadFile(resolvedPath) - if (content) { + if (content && shouldIncludeFile(resolvedPath, content, tracker)) { return `# Agent Rules Standard (${filename}):\n${content}` } } catch (err) { @@ -274,6 +333,12 @@ export async function addCustomInstructions( ): Promise { const sections = [] + // Create file tracker to prevent duplicates + const fileTracker: FileTracker = { + resolvedPaths: new Set(), + contentHashes: new Set(), + } + // Load mode-specific rules if mode is provided let modeRuleContent = "" let usedRuleFile = "" @@ -286,7 +351,7 @@ export async function addCustomInstructions( for (const rooDir of rooDirectories) { const modeRulesDir = path.join(rooDir, `rules-${mode}`) if (await directoryExists(modeRulesDir)) { - const files = await readTextFilesFromDirectory(modeRulesDir) + const files = await readTextFilesFromDirectory(modeRulesDir, fileTracker) if (files.length > 0) { const content = formatDirectoryContent(modeRulesDir, files) modeRules.push(content) @@ -301,13 +366,17 @@ export async function addCustomInstructions( } else { // Fall back to existing behavior for legacy files const rooModeRuleFile = `.roorules-${mode}` - modeRuleContent = await safeReadFile(path.join(cwd, rooModeRuleFile)) - if (modeRuleContent) { + const rooModeRuleFilePath = path.join(cwd, rooModeRuleFile) + const rooModeContent = await safeReadFile(rooModeRuleFilePath) + if (rooModeContent && shouldIncludeFile(rooModeRuleFilePath, rooModeContent, fileTracker)) { + modeRuleContent = rooModeContent usedRuleFile = rooModeRuleFile } else { const clineModeRuleFile = `.clinerules-${mode}` - modeRuleContent = await safeReadFile(path.join(cwd, clineModeRuleFile)) - if (modeRuleContent) { + const clineModeRuleFilePath = path.join(cwd, clineModeRuleFile) + const clineModeContent = await safeReadFile(clineModeRuleFilePath) + if (clineModeContent && shouldIncludeFile(clineModeRuleFilePath, clineModeContent, fileTracker)) { + modeRuleContent = clineModeContent usedRuleFile = clineModeRuleFile } } @@ -350,14 +419,14 @@ export async function addCustomInstructions( // Add AGENTS.md content if enabled (default: true) if (options.settings?.useAgentRules !== false) { - const agentRulesContent = await loadAgentRulesFile(cwd) + const agentRulesContent = await loadAgentRulesFile(cwd, fileTracker) if (agentRulesContent && agentRulesContent.trim()) { rules.push(agentRulesContent.trim()) } } // Add generic rules - const genericRuleContent = await loadRuleFiles(cwd) + const genericRuleContent = await loadRuleFiles(cwd, fileTracker) if (genericRuleContent && genericRuleContent.trim()) { rules.push(genericRuleContent.trim()) }