Skip to content

Commit 25fed26

Browse files
daniel-lxscte
authored andcommitted
fix: align codebase indexing with list-files hidden directory filtering (#4709)
1 parent 3d84cde commit 25fed26

File tree

7 files changed

+383
-24
lines changed

7 files changed

+383
-24
lines changed
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
// npx vitest services/code-index/processors/__tests__/file-watcher.spec.ts
2+
3+
import { vi, describe, it, expect, beforeEach } from "vitest"
4+
import { FileWatcher } from "../file-watcher"
5+
import * as vscode from "vscode"
6+
7+
// Mock dependencies
8+
vi.mock("../../cache-manager")
9+
vi.mock("../../../core/ignore/RooIgnoreController")
10+
vi.mock("ignore")
11+
12+
// Mock vscode module
13+
vi.mock("vscode", () => ({
14+
workspace: {
15+
createFileSystemWatcher: vi.fn(),
16+
workspaceFolders: [
17+
{
18+
uri: {
19+
fsPath: "/mock/workspace",
20+
},
21+
},
22+
],
23+
},
24+
RelativePattern: vi.fn().mockImplementation((base, pattern) => ({ base, pattern })),
25+
Uri: {
26+
file: vi.fn().mockImplementation((path) => ({ fsPath: path })),
27+
},
28+
EventEmitter: vi.fn().mockImplementation(() => ({
29+
event: vi.fn(),
30+
fire: vi.fn(),
31+
dispose: vi.fn(),
32+
})),
33+
ExtensionContext: vi.fn(),
34+
}))
35+
36+
describe("FileWatcher", () => {
37+
let fileWatcher: FileWatcher
38+
let mockWatcher: any
39+
let mockOnDidCreate: any
40+
let mockOnDidChange: any
41+
let mockOnDidDelete: any
42+
let mockContext: any
43+
let mockCacheManager: any
44+
let mockEmbedder: any
45+
let mockVectorStore: any
46+
let mockIgnoreInstance: any
47+
48+
beforeEach(() => {
49+
// Reset all mocks
50+
vi.clearAllMocks()
51+
52+
// Create mock event handlers
53+
mockOnDidCreate = vi.fn()
54+
mockOnDidChange = vi.fn()
55+
mockOnDidDelete = vi.fn()
56+
57+
// Create mock watcher
58+
mockWatcher = {
59+
onDidCreate: vi.fn().mockImplementation((handler) => {
60+
mockOnDidCreate = handler
61+
return { dispose: vi.fn() }
62+
}),
63+
onDidChange: vi.fn().mockImplementation((handler) => {
64+
mockOnDidChange = handler
65+
return { dispose: vi.fn() }
66+
}),
67+
onDidDelete: vi.fn().mockImplementation((handler) => {
68+
mockOnDidDelete = handler
69+
return { dispose: vi.fn() }
70+
}),
71+
dispose: vi.fn(),
72+
}
73+
74+
// Mock createFileSystemWatcher to return our mock watcher
75+
vi.mocked(vscode.workspace.createFileSystemWatcher).mockReturnValue(mockWatcher)
76+
77+
// Create mock dependencies
78+
mockContext = {
79+
subscriptions: [],
80+
}
81+
82+
mockCacheManager = {
83+
getHash: vi.fn(),
84+
updateHash: vi.fn(),
85+
deleteHash: vi.fn(),
86+
}
87+
88+
mockEmbedder = {
89+
createEmbeddings: vi.fn().mockResolvedValue({ embeddings: [[0.1, 0.2, 0.3]] }),
90+
}
91+
92+
mockVectorStore = {
93+
upsertPoints: vi.fn().mockResolvedValue(undefined),
94+
deletePointsByFilePath: vi.fn().mockResolvedValue(undefined),
95+
}
96+
97+
mockIgnoreInstance = {
98+
ignores: vi.fn().mockReturnValue(false),
99+
}
100+
101+
fileWatcher = new FileWatcher(
102+
"/mock/workspace",
103+
mockContext,
104+
mockCacheManager,
105+
mockEmbedder,
106+
mockVectorStore,
107+
mockIgnoreInstance,
108+
)
109+
})
110+
111+
describe("file filtering", () => {
112+
it("should ignore files in hidden directories on create events", async () => {
113+
// Initialize the file watcher
114+
await fileWatcher.initialize()
115+
116+
// Spy on the vector store to see which files are actually processed
117+
const processedFiles: string[] = []
118+
mockVectorStore.upsertPoints.mockImplementation(async (points: any[]) => {
119+
points.forEach((point) => {
120+
if (point.payload?.file_path) {
121+
processedFiles.push(point.payload.file_path)
122+
}
123+
})
124+
})
125+
126+
// Simulate file creation events
127+
const testCases = [
128+
{ path: "/mock/workspace/src/file.ts", shouldProcess: true },
129+
{ path: "/mock/workspace/.git/config", shouldProcess: false },
130+
{ path: "/mock/workspace/.hidden/file.ts", shouldProcess: false },
131+
{ path: "/mock/workspace/src/.next/static/file.js", shouldProcess: false },
132+
{ path: "/mock/workspace/node_modules/package/index.js", shouldProcess: false },
133+
{ path: "/mock/workspace/normal/file.js", shouldProcess: true },
134+
]
135+
136+
// Trigger file creation events
137+
for (const { path } of testCases) {
138+
await mockOnDidCreate({ fsPath: path })
139+
}
140+
141+
// Wait for batch processing
142+
await new Promise((resolve) => setTimeout(resolve, 600))
143+
144+
// Check that files in hidden directories were not processed
145+
expect(processedFiles).not.toContain("src/.next/static/file.js")
146+
expect(processedFiles).not.toContain(".git/config")
147+
expect(processedFiles).not.toContain(".hidden/file.ts")
148+
})
149+
150+
it("should ignore files in hidden directories on change events", async () => {
151+
// Initialize the file watcher
152+
await fileWatcher.initialize()
153+
154+
// Track which files are processed
155+
const processedFiles: string[] = []
156+
mockVectorStore.upsertPoints.mockImplementation(async (points: any[]) => {
157+
points.forEach((point) => {
158+
if (point.payload?.file_path) {
159+
processedFiles.push(point.payload.file_path)
160+
}
161+
})
162+
})
163+
164+
// Simulate file change events
165+
const testCases = [
166+
{ path: "/mock/workspace/src/file.ts", shouldProcess: true },
167+
{ path: "/mock/workspace/.vscode/settings.json", shouldProcess: false },
168+
{ path: "/mock/workspace/src/.cache/data.json", shouldProcess: false },
169+
{ path: "/mock/workspace/dist/bundle.js", shouldProcess: false },
170+
]
171+
172+
// Trigger file change events
173+
for (const { path } of testCases) {
174+
await mockOnDidChange({ fsPath: path })
175+
}
176+
177+
// Wait for batch processing
178+
await new Promise((resolve) => setTimeout(resolve, 600))
179+
180+
// Check that files in hidden directories were not processed
181+
expect(processedFiles).not.toContain(".vscode/settings.json")
182+
expect(processedFiles).not.toContain("src/.cache/data.json")
183+
})
184+
185+
it("should ignore files in hidden directories on delete events", async () => {
186+
// Initialize the file watcher
187+
await fileWatcher.initialize()
188+
189+
// Track which files are deleted
190+
const deletedFiles: string[] = []
191+
mockVectorStore.deletePointsByFilePath.mockImplementation(async (filePath: string) => {
192+
deletedFiles.push(filePath)
193+
})
194+
195+
// Simulate file deletion events
196+
const testCases = [
197+
{ path: "/mock/workspace/src/file.ts", shouldProcess: true },
198+
{ path: "/mock/workspace/.git/objects/abc123", shouldProcess: false },
199+
{ path: "/mock/workspace/.DS_Store", shouldProcess: false },
200+
{ path: "/mock/workspace/build/.cache/temp.js", shouldProcess: false },
201+
]
202+
203+
// Trigger file deletion events
204+
for (const { path } of testCases) {
205+
await mockOnDidDelete({ fsPath: path })
206+
}
207+
208+
// Wait for batch processing
209+
await new Promise((resolve) => setTimeout(resolve, 600))
210+
211+
// Check that files in hidden directories were not processed
212+
expect(deletedFiles).not.toContain(".git/objects/abc123")
213+
expect(deletedFiles).not.toContain(".DS_Store")
214+
expect(deletedFiles).not.toContain("build/.cache/temp.js")
215+
})
216+
217+
it("should handle nested hidden directories correctly", async () => {
218+
// Initialize the file watcher
219+
await fileWatcher.initialize()
220+
221+
// Track which files are processed
222+
const processedFiles: string[] = []
223+
mockVectorStore.upsertPoints.mockImplementation(async (points: any[]) => {
224+
points.forEach((point) => {
225+
if (point.payload?.file_path) {
226+
processedFiles.push(point.payload.file_path)
227+
}
228+
})
229+
})
230+
231+
// Test deeply nested hidden directories
232+
const testCases = [
233+
{ path: "/mock/workspace/src/components/Button.tsx", shouldProcess: true },
234+
{ path: "/mock/workspace/src/.hidden/components/Button.tsx", shouldProcess: false },
235+
{ path: "/mock/workspace/.hidden/src/components/Button.tsx", shouldProcess: false },
236+
{ path: "/mock/workspace/src/components/.hidden/Button.tsx", shouldProcess: false },
237+
]
238+
239+
// Trigger file creation events
240+
for (const { path } of testCases) {
241+
await mockOnDidCreate({ fsPath: path })
242+
}
243+
244+
// Wait for batch processing
245+
await new Promise((resolve) => setTimeout(resolve, 600))
246+
247+
// Check that files in hidden directories were not processed
248+
expect(processedFiles).not.toContain("src/.hidden/components/Button.tsx")
249+
expect(processedFiles).not.toContain(".hidden/src/components/Button.tsx")
250+
expect(processedFiles).not.toContain("src/components/.hidden/Button.tsx")
251+
})
252+
})
253+
254+
describe("dispose", () => {
255+
it("should dispose of the watcher when disposed", async () => {
256+
await fileWatcher.initialize()
257+
fileWatcher.dispose()
258+
259+
expect(mockWatcher.dispose).toHaveBeenCalled()
260+
})
261+
})
262+
})

src/services/code-index/processors/__tests__/scanner.spec.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,5 +209,38 @@ describe("DirectoryScanner", () => {
209209
expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith("old/file.js")
210210
expect(mockCacheManager.deleteHash).toHaveBeenCalledWith("old/file.js")
211211
})
212+
213+
it("should filter out files in hidden directories", async () => {
214+
const { listFiles } = await import("../../../glob/list-files")
215+
// Mock listFiles to return files including some in hidden directories
216+
vi.mocked(listFiles).mockResolvedValue([
217+
[
218+
"test/file1.js",
219+
"test/.hidden/file2.js",
220+
".git/config",
221+
"src/.next/static/file3.js",
222+
"normal/file4.js",
223+
],
224+
false,
225+
])
226+
227+
// Mock parseFile to track which files are actually processed
228+
const processedFiles: string[] = []
229+
;(mockCodeParser.parseFile as any).mockImplementation((filePath: string) => {
230+
processedFiles.push(filePath)
231+
return []
232+
})
233+
234+
await scanner.scanDirectory("/test")
235+
236+
// Verify that only non-hidden files were processed
237+
expect(processedFiles).toEqual(["test/file1.js", "normal/file4.js"])
238+
expect(processedFiles).not.toContain("test/.hidden/file2.js")
239+
expect(processedFiles).not.toContain(".git/config")
240+
expect(processedFiles).not.toContain("src/.next/static/file3.js")
241+
242+
// Verify the stats
243+
expect(mockCodeParser.parseFile).toHaveBeenCalledTimes(2)
244+
})
212245
})
213246
})

src/services/code-index/processors/file-watcher.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import {
2222
import { codeParser } from "./parser"
2323
import { CacheManager } from "../cache-manager"
2424
import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path"
25+
import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
2526

2627
/**
2728
* Implementation of the file watcher interface
@@ -453,6 +454,15 @@ export class FileWatcher implements IFileWatcher {
453454
*/
454455
async processFile(filePath: string): Promise<FileProcessingResult> {
455456
try {
457+
// Check if file is in an ignored directory
458+
if (isPathInIgnoredDirectory(filePath)) {
459+
return {
460+
path: filePath,
461+
status: "skipped" as const,
462+
reason: "File is in an ignored directory",
463+
}
464+
}
465+
456466
// Check if file should be ignored
457467
const relativeFilePath = generateRelativeFilePath(filePath)
458468
if (

src/services/code-index/processors/scanner.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import {
2222
PARSING_CONCURRENCY,
2323
BATCH_PROCESSING_CONCURRENCY,
2424
} from "../constants"
25+
import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
2526

2627
export class DirectoryScanner implements IDirectoryScanner {
2728
constructor(
@@ -61,10 +62,16 @@ export class DirectoryScanner implements IDirectoryScanner {
6162
// Filter paths using .rooignore
6263
const allowedPaths = ignoreController.filterPaths(filePaths)
6364

64-
// Filter by supported extensions and ignore patterns
65+
// Filter by supported extensions, ignore patterns, and excluded directories
6566
const supportedPaths = allowedPaths.filter((filePath) => {
6667
const ext = path.extname(filePath).toLowerCase()
6768
const relativeFilePath = generateRelativeFilePath(filePath)
69+
70+
// Check if file is in an ignored directory using the shared helper
71+
if (isPathInIgnoredDirectory(filePath)) {
72+
return false
73+
}
74+
6875
return scannerExtensions.includes(ext) && !this.ignoreInstance.ignores(relativeFilePath)
6976
})
7077

src/services/glob/constants.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* List of directories that are typically large and should be ignored
3+
* when showing recursive file listings or scanning for code indexing.
4+
* This list is shared between list-files.ts and the codebase indexing scanner
5+
* to ensure consistent behavior across the application.
6+
*/
7+
export const DIRS_TO_IGNORE = [
8+
"node_modules",
9+
"__pycache__",
10+
"env",
11+
"venv",
12+
"target/dependency",
13+
"build/dependencies",
14+
"dist",
15+
"out",
16+
"bundle",
17+
"vendor",
18+
"tmp",
19+
"temp",
20+
"deps",
21+
"pkg",
22+
"Pods",
23+
".*",
24+
]

0 commit comments

Comments
 (0)