Skip to content

Commit 70f5901

Browse files
committed
feat: implement unified .gitignore/.rooignore handling for consistent file indexing
- Create UnifiedIgnoreController that combines .gitignore and .rooignore processing - Implement fallback behavior: .gitignore used when .rooignore is missing or empty - Update CodeIndexManager to use unified ignore patterns instead of separate systems - Update DirectoryScanner to use UnifiedIgnoreController for consistent filtering - Update list-files service to support .rooignore patterns via unified controller - Add comprehensive test suite with 23 test cases covering all functionality - Fix VSCode mocks in test files to include missing RelativePattern and file watcher APIs Fixes #5655: Resolves inconsistent .gitignore/.rooignore handling in codebase indexing
1 parent e84dd0a commit 70f5901

File tree

9 files changed

+767
-125
lines changed

9 files changed

+767
-125
lines changed
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
import path from "path"
2+
import { fileExistsAtPath } from "../../utils/fs"
3+
import fs from "fs/promises"
4+
import ignore, { Ignore } from "ignore"
5+
import * as vscode from "vscode"
6+
7+
export const LOCK_TEXT_SYMBOL = "\u{1F512}"
8+
9+
/**
10+
* Unified controller that handles both .gitignore and .rooignore patterns
11+
* with proper fallback behavior. When .rooignore is missing or empty,
12+
* falls back to .gitignore patterns for consistent file filtering.
13+
*/
14+
export class UnifiedIgnoreController {
15+
private cwd: string
16+
private ignoreInstance: Ignore
17+
private disposables: vscode.Disposable[] = []
18+
private rooIgnoreContent: string | undefined
19+
private gitIgnoreContent: string | undefined
20+
private hasRooIgnore: boolean = false
21+
22+
constructor(cwd: string) {
23+
this.cwd = cwd
24+
this.ignoreInstance = ignore()
25+
this.rooIgnoreContent = undefined
26+
this.gitIgnoreContent = undefined
27+
this.setupFileWatchers()
28+
}
29+
30+
/**
31+
* Initialize the controller by loading both .gitignore and .rooignore patterns
32+
* Must be called after construction and before using the controller
33+
*/
34+
async initialize(): Promise<void> {
35+
await this.loadIgnorePatterns()
36+
}
37+
38+
/**
39+
* Set up file watchers for both .gitignore and .rooignore changes
40+
*/
41+
private setupFileWatchers(): void {
42+
// Watch .rooignore
43+
const rooignorePattern = new vscode.RelativePattern(this.cwd, ".rooignore")
44+
const rooIgnoreWatcher = vscode.workspace.createFileSystemWatcher(rooignorePattern)
45+
46+
// Watch .gitignore
47+
const gitignorePattern = new vscode.RelativePattern(this.cwd, ".gitignore")
48+
const gitIgnoreWatcher = vscode.workspace.createFileSystemWatcher(gitignorePattern)
49+
50+
// Set up event handlers for .rooignore
51+
this.disposables.push(
52+
rooIgnoreWatcher.onDidChange(() => this.loadIgnorePatterns()),
53+
rooIgnoreWatcher.onDidCreate(() => this.loadIgnorePatterns()),
54+
rooIgnoreWatcher.onDidDelete(() => this.loadIgnorePatterns()),
55+
rooIgnoreWatcher,
56+
)
57+
58+
// Set up event handlers for .gitignore
59+
this.disposables.push(
60+
gitIgnoreWatcher.onDidChange(() => this.loadIgnorePatterns()),
61+
gitIgnoreWatcher.onDidCreate(() => this.loadIgnorePatterns()),
62+
gitIgnoreWatcher.onDidDelete(() => this.loadIgnorePatterns()),
63+
gitIgnoreWatcher,
64+
)
65+
}
66+
67+
/**
68+
* Load patterns from both .gitignore and .rooignore files with proper fallback logic
69+
*/
70+
private async loadIgnorePatterns(): Promise<void> {
71+
try {
72+
// Reset ignore instance to prevent duplicate patterns
73+
this.ignoreInstance = ignore()
74+
75+
// Load .rooignore first (higher priority)
76+
const rooIgnorePath = path.join(this.cwd, ".rooignore")
77+
this.hasRooIgnore = await fileExistsAtPath(rooIgnorePath)
78+
79+
if (this.hasRooIgnore) {
80+
try {
81+
this.rooIgnoreContent = await fs.readFile(rooIgnorePath, "utf8")
82+
// Only use .rooignore if it has actual content (not just whitespace)
83+
const hasContent = this.rooIgnoreContent.trim().length > 0
84+
if (hasContent) {
85+
this.ignoreInstance.add(this.rooIgnoreContent)
86+
this.ignoreInstance.add(".rooignore")
87+
return // Use .rooignore exclusively when it exists and has content
88+
}
89+
} catch (error) {
90+
console.error("Error reading .rooignore:", error)
91+
this.rooIgnoreContent = undefined
92+
this.hasRooIgnore = false
93+
}
94+
} else {
95+
this.rooIgnoreContent = undefined
96+
}
97+
98+
// Fallback to .gitignore when .rooignore is missing or empty
99+
await this.loadGitIgnorePatterns()
100+
} catch (error) {
101+
console.error("Unexpected error loading ignore patterns:", error)
102+
}
103+
}
104+
105+
/**
106+
* Load .gitignore patterns hierarchically (from workspace root up to current directory)
107+
*/
108+
private async loadGitIgnorePatterns(): Promise<void> {
109+
try {
110+
// Find all .gitignore files from the current directory up to the workspace root
111+
const gitignoreFiles = await this.findGitignoreFiles(this.cwd)
112+
113+
let hasGitIgnoreContent = false
114+
let combinedGitIgnoreContent = ""
115+
116+
// Add patterns from all .gitignore files (root first, then more specific ones)
117+
for (const gitignoreFile of gitignoreFiles) {
118+
try {
119+
const content = await fs.readFile(gitignoreFile, "utf8")
120+
if (content.trim().length > 0) {
121+
this.ignoreInstance.add(content)
122+
hasGitIgnoreContent = true
123+
combinedGitIgnoreContent += content + "\n"
124+
// Store content from the most specific .gitignore (usually the one in cwd)
125+
if (path.dirname(gitignoreFile) === this.cwd) {
126+
this.gitIgnoreContent = content
127+
}
128+
}
129+
} catch (err) {
130+
console.warn(`Error reading .gitignore at ${gitignoreFile}: ${err}`)
131+
}
132+
}
133+
134+
// If we found .gitignore content but no specific one in cwd, use combined content
135+
if (hasGitIgnoreContent && !this.gitIgnoreContent) {
136+
this.gitIgnoreContent = combinedGitIgnoreContent.trim()
137+
}
138+
139+
// Always ignore .gitignore files themselves
140+
if (hasGitIgnoreContent) {
141+
this.ignoreInstance.add(".gitignore")
142+
}
143+
} catch (error) {
144+
console.error("Error loading .gitignore patterns:", error)
145+
}
146+
}
147+
148+
/**
149+
* Find all .gitignore files from the given directory up to the workspace root
150+
*/
151+
private async findGitignoreFiles(startPath: string): Promise<string[]> {
152+
const gitignoreFiles: string[] = []
153+
let currentPath = startPath
154+
155+
// Walk up the directory tree looking for .gitignore files
156+
while (currentPath && currentPath !== path.dirname(currentPath)) {
157+
const gitignorePath = path.join(currentPath, ".gitignore")
158+
159+
try {
160+
await fs.access(gitignorePath)
161+
gitignoreFiles.push(gitignorePath)
162+
} catch {
163+
// .gitignore doesn't exist at this level, continue
164+
}
165+
166+
// Move up one directory
167+
const parentPath = path.dirname(currentPath)
168+
if (parentPath === currentPath) {
169+
break // Reached root
170+
}
171+
currentPath = parentPath
172+
}
173+
174+
// Return in reverse order (root .gitignore first, then more specific ones)
175+
return gitignoreFiles.reverse()
176+
}
177+
178+
/**
179+
* Check if a file should be accessible to the LLM
180+
* @param filePath - Path to check (relative to cwd)
181+
* @returns true if file is accessible, false if ignored
182+
*/
183+
validateAccess(filePath: string): boolean {
184+
try {
185+
// Normalize path to be relative to cwd and use forward slashes
186+
const absolutePath = path.resolve(this.cwd, filePath)
187+
const relativePath = path.relative(this.cwd, absolutePath).replace(/\\/g, "/")
188+
189+
// Use the unified ignore instance which contains either .rooignore or .gitignore patterns
190+
return !this.ignoreInstance.ignores(relativePath)
191+
} catch (error) {
192+
// Ignore is designed to work with relative file paths, so will throw error for paths outside cwd.
193+
// We are allowing access to all files outside cwd.
194+
return true
195+
}
196+
}
197+
198+
/**
199+
* Check if a terminal command should be allowed to execute based on file access patterns
200+
* @param command - Terminal command to validate
201+
* @returns path of file that is being accessed if it is being accessed, undefined if command is allowed
202+
*/
203+
validateCommand(command: string): string | undefined {
204+
// Always allow if no ignore patterns are loaded
205+
if (!this.rooIgnoreContent && !this.gitIgnoreContent) {
206+
return undefined
207+
}
208+
209+
// Split command into parts and get the base command
210+
const parts = command.trim().split(/\s+/)
211+
const baseCommand = parts[0].toLowerCase()
212+
213+
// Commands that read file contents
214+
const fileReadingCommands = [
215+
// Unix commands
216+
"cat",
217+
"less",
218+
"more",
219+
"head",
220+
"tail",
221+
"grep",
222+
"awk",
223+
"sed",
224+
// PowerShell commands and aliases
225+
"get-content",
226+
"gc",
227+
"type",
228+
"select-string",
229+
"sls",
230+
]
231+
232+
if (fileReadingCommands.includes(baseCommand)) {
233+
// Check each argument that could be a file path
234+
for (let i = 1; i < parts.length; i++) {
235+
const arg = parts[i]
236+
// Skip command flags/options (both Unix and PowerShell style)
237+
if (arg.startsWith("-") || arg.startsWith("/")) {
238+
continue
239+
}
240+
// Ignore PowerShell parameter names
241+
if (arg.includes(":")) {
242+
continue
243+
}
244+
// Validate file access
245+
if (!this.validateAccess(arg)) {
246+
return arg
247+
}
248+
}
249+
}
250+
251+
return undefined
252+
}
253+
254+
/**
255+
* Filter an array of paths, removing those that should be ignored
256+
* @param paths - Array of paths to filter (relative to cwd)
257+
* @returns Array of allowed paths
258+
*/
259+
filterPaths(paths: string[]): string[] {
260+
try {
261+
return paths
262+
.map((p) => ({
263+
path: p,
264+
allowed: this.validateAccess(p),
265+
}))
266+
.filter((x) => x.allowed)
267+
.map((x) => x.path)
268+
} catch (error) {
269+
console.error("Error filtering paths:", error)
270+
return [] // Fail closed for security
271+
}
272+
}
273+
274+
/**
275+
* Get the current ignore instance for external use
276+
* @returns The ignore instance containing the current patterns
277+
*/
278+
getIgnoreInstance(): Ignore {
279+
return this.ignoreInstance
280+
}
281+
282+
/**
283+
* Check if .rooignore file exists and has content
284+
* @returns true if .rooignore is being used, false if falling back to .gitignore
285+
*/
286+
isUsingRooIgnore(): boolean {
287+
return this.hasRooIgnore && !!this.rooIgnoreContent?.trim()
288+
}
289+
290+
/**
291+
* Get formatted instructions about the ignore files for the LLM
292+
* @returns Formatted instructions or undefined if no ignore files exist
293+
*/
294+
getInstructions(): string | undefined {
295+
if (this.isUsingRooIgnore()) {
296+
return `# .rooignore\n\n(The following is provided by a root-level .rooignore file where the user has specified files and directories that should not be accessed. When using list_files, you'll notice a ${LOCK_TEXT_SYMBOL} next to files that are blocked. Attempting to access the file's contents e.g. through read_file will result in an error.)\n\n${this.rooIgnoreContent}\n.rooignore`
297+
} else if (this.gitIgnoreContent) {
298+
return `# .gitignore (fallback)\n\n(The following patterns are being used from .gitignore since no .rooignore file was found. Files matching these patterns will be excluded from indexing and file operations. When using list_files, you'll notice a ${LOCK_TEXT_SYMBOL} next to files that are blocked.)\n\n${this.gitIgnoreContent}`
299+
}
300+
301+
return undefined
302+
}
303+
304+
/**
305+
* Clean up resources when the controller is no longer needed
306+
*/
307+
dispose(): void {
308+
this.disposables.forEach((d) => d.dispose())
309+
this.disposables = []
310+
}
311+
}

0 commit comments

Comments
 (0)