Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions src/core/prompts/sections/__tests__/custom-instructions.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1605,3 +1605,120 @@ describe("Rules directory reading", () => {
expect(result).toBe("\n# Rules from .roorules:\nfallback content\n")
})
})

describe("File deduplication", () => {
beforeEach(() => {
vi.clearAllMocks()
})

test("should skip files with duplicate content (same content hash)", async () => {
// Mock directory structure with different files having identical content
const mockEntries = [
{
name: "file1.md",
isFile: () => true,
isSymbolicLink: () => false,
isDirectory: () => false,
parentPath: "/fake/path/.roo/rules",
},
{
name: "file2.md",
isFile: () => true,
isSymbolicLink: () => false,
isDirectory: () => false,
parentPath: "/fake/path/.roo/rules",
},
{
name: "file3.md",
isFile: () => true,
isSymbolicLink: () => false,
isDirectory: () => false,
parentPath: "/fake/path/.roo/rules",
},
]

// Mock fs operations
statMock.mockResolvedValueOnce({ isDirectory: () => true }) // .roo/rules exists
readdirMock.mockResolvedValueOnce(mockEntries)

// Mock file stats for each file
statMock.mockResolvedValueOnce({ isFile: () => true }) // file1.md
statMock.mockResolvedValueOnce({ isFile: () => true }) // file2.md
statMock.mockResolvedValueOnce({ isFile: () => true }) // file3.md

// Mock file content reading - file2 and file3 have identical content
readFileMock.mockResolvedValueOnce("unique content") // file1.md
readFileMock.mockResolvedValueOnce("duplicate content") // file2.md
readFileMock.mockResolvedValueOnce("duplicate content") // file3.md (should be skipped due to duplicate hash)

const result = await loadRuleFiles("/fake/path")

// Should include both unique contents, but duplicate content only once
expect(result).toContain("unique content")
expect(result).toContain("duplicate content")

// Count occurrences of "duplicate content" - should only appear once
const duplicateContentMatches = (result.match(/duplicate content/g) || []).length
expect(duplicateContentMatches).toBe(1)
})

test("should skip files with duplicate resolved paths via symlinks", async () => {
// Mock directory structure with symlinks pointing to the same file
const mockEntries = [
{
name: "original.md",
isFile: () => true,
isSymbolicLink: () => false,
isDirectory: () => false,
parentPath: "/fake/path/.roo/rules",
},
{
name: "symlink1.md",
isFile: () => false,
isSymbolicLink: () => true,
isDirectory: () => false,
parentPath: "/fake/path/.roo/rules",
},
{
name: "symlink2.md",
isFile: () => false,
isSymbolicLink: () => true,
isDirectory: () => false,
parentPath: "/fake/path/.roo/rules",
},
]

// Mock fs operations
statMock.mockResolvedValueOnce({ isDirectory: () => true }) // .roo/rules exists
readdirMock.mockResolvedValueOnce(mockEntries)

// Mock file stats
statMock.mockResolvedValueOnce({ isFile: () => true }) // original.md

// Mock symlink resolution - both symlinks point to original.md
readlinkMock.mockResolvedValueOnce("original.md") // symlink1 -> original.md
statMock.mockResolvedValueOnce({ isFile: () => true }) // symlink1 target

readlinkMock.mockResolvedValueOnce("original.md") // symlink2 -> original.md
statMock.mockResolvedValueOnce({ isFile: () => true }) // symlink2 target

// Mock file content reading
readFileMock.mockResolvedValueOnce("original content") // original.md
readFileMock.mockResolvedValueOnce("original content") // symlink1 target (should be skipped - same resolved path)
readFileMock.mockResolvedValueOnce("original content") // symlink2 target (should be skipped - same resolved path)

const result = await loadRuleFiles("/fake/path")

// Should only include original content once
expect(result).toContain("original content")

// Count occurrences of "original content" - should only appear once
const originalContentMatches = (result.match(/original content/g) || []).length
expect(originalContentMatches).toBe(1)
})

// Note: The deduplication functionality is tested by the two tests above.
// Additional integration testing would require more complex mock setup
// that may be brittle. The core functionality is verified by the
// content hash and resolved path deduplication tests.
})
97 changes: 83 additions & 14 deletions src/core/prompts/sections/custom-instructions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,49 @@ import type { SystemPromptSettings } from "../types"
import { LANGUAGES } from "../../../shared/language"
import { getRooDirectoriesForCwd, getGlobalRooDirectory } from "../../../services/roo-config"

/**
* File tracking system to prevent duplicate inclusions
*/
interface FileTracker {
resolvedPaths: Set<string>
contentHashes: Set<string>
}

/**
* Create content hash using FNV-1a algorithm (non-cryptographic, fast, cross-platform)
*/
function createContentHash(content: string): string {
let hash = 2166136261 // FNV offset basis (32-bit)

for (let i = 0; i < content.length; i++) {
hash ^= content.charCodeAt(i)
hash = Math.imul(hash, 16777619) // FNV prime (32-bit)
}

return (hash >>> 0).toString(16) // Convert to unsigned 32-bit hex
}

/**
* Check if a file should be included based on resolved path and content hash
*/
function shouldIncludeFile(resolvedPath: string, content: string, tracker: FileTracker): boolean {
// Skip if we've already included this resolved path
if (tracker.resolvedPaths.has(resolvedPath)) {
return false
}

// Skip if we've already included content with this hash
const contentHash = createContentHash(content)
if (tracker.contentHashes.has(contentHash)) {
return false
}

// Add to tracking sets
tracker.resolvedPaths.add(resolvedPath)
tracker.contentHashes.add(contentHash)
return true
}

/**
* Safely read a file and return its trimmed content
*/
Expand Down Expand Up @@ -108,7 +151,10 @@ async function resolveSymLink(
/**
* Read all text files from a directory in alphabetical order
*/
async function readTextFilesFromDirectory(dirPath: string): Promise<Array<{ filename: string; content: string }>> {
async function readTextFilesFromDirectory(
dirPath: string,
tracker: FileTracker,
): Promise<Array<{ filename: string; content: string }>> {
try {
const entries = await fs.readdir(dirPath, { withFileTypes: true, recursive: true })

Expand Down Expand Up @@ -136,6 +182,12 @@ async function readTextFilesFromDirectory(dirPath: string): Promise<Array<{ file
return null
}
const content = await safeReadFile(resolvedPath)

// Skip if file was already included or content is duplicate
if (!shouldIncludeFile(resolvedPath, content, tracker)) {
return null
}

// Use resolvedPath for display to maintain existing behavior
return { filename: resolvedPath, content, sortKey: originalPath }
}
Expand Down Expand Up @@ -182,15 +234,21 @@ function formatDirectoryContent(dirPath: string, files: Array<{ filename: string
* Load rule files from global and project-local directories
* Global rules are loaded first, then project-local rules which can override global ones
*/
export async function loadRuleFiles(cwd: string): Promise<string> {
export async function loadRuleFiles(cwd: string, tracker?: FileTracker): Promise<string> {
// Create a default tracker if none provided (for backward compatibility with tests)
const fileTracker = tracker || {
resolvedPaths: new Set<string>(),
contentHashes: new Set<string>(),
}

const rules: string[] = []
const rooDirectories = getRooDirectoriesForCwd(cwd)

// Check for .roo/rules/ directories in order (global first, then project-local)
for (const rooDir of rooDirectories) {
const rulesDir = path.join(rooDir, "rules")
if (await directoryExists(rulesDir)) {
const files = await readTextFilesFromDirectory(rulesDir)
const files = await readTextFilesFromDirectory(rulesDir, fileTracker)
if (files.length > 0) {
const content = formatDirectoryContent(rulesDir, files)
rules.push(content)
Expand All @@ -207,8 +265,9 @@ export async function loadRuleFiles(cwd: string): Promise<string> {
const ruleFiles = [".roorules", ".clinerules"]

for (const file of ruleFiles) {
const content = await safeReadFile(path.join(cwd, file))
if (content) {
const filePath = path.join(cwd, file)
const content = await safeReadFile(filePath)
if (content && shouldIncludeFile(filePath, content, fileTracker)) {
return `\n# Rules from ${file}:\n${content}\n`
}
}
Expand All @@ -220,7 +279,7 @@ export async function loadRuleFiles(cwd: string): Promise<string> {
* Load AGENTS.md or AGENT.md file from the project root if it exists
* Checks for both AGENTS.md (standard) and AGENT.md (alternative) for compatibility
*/
async function loadAgentRulesFile(cwd: string): Promise<string> {
async function loadAgentRulesFile(cwd: string, tracker: FileTracker): Promise<string> {
// Try both filenames - AGENTS.md (standard) first, then AGENT.md (alternative)
const filenames = ["AGENTS.md", "AGENT.md"]

Expand Down Expand Up @@ -251,7 +310,7 @@ async function loadAgentRulesFile(cwd: string): Promise<string> {

// Read the content from the resolved path
const content = await safeReadFile(resolvedPath)
if (content) {
if (content && shouldIncludeFile(resolvedPath, content, tracker)) {
return `# Agent Rules Standard (${filename}):\n${content}`
}
} catch (err) {
Expand All @@ -274,6 +333,12 @@ export async function addCustomInstructions(
): Promise<string> {
const sections = []

// Create file tracker to prevent duplicates
const fileTracker: FileTracker = {
resolvedPaths: new Set<string>(),
contentHashes: new Set<string>(),
}

// Load mode-specific rules if mode is provided
let modeRuleContent = ""
let usedRuleFile = ""
Expand All @@ -286,7 +351,7 @@ export async function addCustomInstructions(
for (const rooDir of rooDirectories) {
const modeRulesDir = path.join(rooDir, `rules-${mode}`)
if (await directoryExists(modeRulesDir)) {
const files = await readTextFilesFromDirectory(modeRulesDir)
const files = await readTextFilesFromDirectory(modeRulesDir, fileTracker)
if (files.length > 0) {
const content = formatDirectoryContent(modeRulesDir, files)
modeRules.push(content)
Expand All @@ -301,13 +366,17 @@ export async function addCustomInstructions(
} else {
// Fall back to existing behavior for legacy files
const rooModeRuleFile = `.roorules-${mode}`
modeRuleContent = await safeReadFile(path.join(cwd, rooModeRuleFile))
if (modeRuleContent) {
const rooModeRuleFilePath = path.join(cwd, rooModeRuleFile)
const rooModeContent = await safeReadFile(rooModeRuleFilePath)
if (rooModeContent && shouldIncludeFile(rooModeRuleFilePath, rooModeContent, fileTracker)) {
modeRuleContent = rooModeContent
usedRuleFile = rooModeRuleFile
} else {
const clineModeRuleFile = `.clinerules-${mode}`
modeRuleContent = await safeReadFile(path.join(cwd, clineModeRuleFile))
if (modeRuleContent) {
const clineModeRuleFilePath = path.join(cwd, clineModeRuleFile)
const clineModeContent = await safeReadFile(clineModeRuleFilePath)
if (clineModeContent && shouldIncludeFile(clineModeRuleFilePath, clineModeContent, fileTracker)) {
modeRuleContent = clineModeContent
usedRuleFile = clineModeRuleFile
}
}
Expand Down Expand Up @@ -350,14 +419,14 @@ export async function addCustomInstructions(

// Add AGENTS.md content if enabled (default: true)
if (options.settings?.useAgentRules !== false) {
const agentRulesContent = await loadAgentRulesFile(cwd)
const agentRulesContent = await loadAgentRulesFile(cwd, fileTracker)
if (agentRulesContent && agentRulesContent.trim()) {
rules.push(agentRulesContent.trim())
}
}

// Add generic rules
const genericRuleContent = await loadRuleFiles(cwd)
const genericRuleContent = await loadRuleFiles(cwd, fileTracker)
if (genericRuleContent && genericRuleContent.trim()) {
rules.push(genericRuleContent.trim())
}
Expand Down
Loading