diff --git a/src/services/code-index/processors/__tests__/file-watcher.spec.ts b/src/services/code-index/processors/__tests__/file-watcher.spec.ts new file mode 100644 index 0000000000..5564b0329a --- /dev/null +++ b/src/services/code-index/processors/__tests__/file-watcher.spec.ts @@ -0,0 +1,262 @@ +// npx vitest services/code-index/processors/__tests__/file-watcher.spec.ts + +import { vi, describe, it, expect, beforeEach } from "vitest" +import { FileWatcher } from "../file-watcher" +import * as vscode from "vscode" + +// Mock dependencies +vi.mock("../../cache-manager") +vi.mock("../../../core/ignore/RooIgnoreController") +vi.mock("ignore") + +// Mock vscode module +vi.mock("vscode", () => ({ + workspace: { + createFileSystemWatcher: vi.fn(), + workspaceFolders: [ + { + uri: { + fsPath: "/mock/workspace", + }, + }, + ], + }, + RelativePattern: vi.fn().mockImplementation((base, pattern) => ({ base, pattern })), + Uri: { + file: vi.fn().mockImplementation((path) => ({ fsPath: path })), + }, + EventEmitter: vi.fn().mockImplementation(() => ({ + event: vi.fn(), + fire: vi.fn(), + dispose: vi.fn(), + })), + ExtensionContext: vi.fn(), +})) + +describe("FileWatcher", () => { + let fileWatcher: FileWatcher + let mockWatcher: any + let mockOnDidCreate: any + let mockOnDidChange: any + let mockOnDidDelete: any + let mockContext: any + let mockCacheManager: any + let mockEmbedder: any + let mockVectorStore: any + let mockIgnoreInstance: any + + beforeEach(() => { + // Reset all mocks + vi.clearAllMocks() + + // Create mock event handlers + mockOnDidCreate = vi.fn() + mockOnDidChange = vi.fn() + mockOnDidDelete = vi.fn() + + // Create mock watcher + mockWatcher = { + onDidCreate: vi.fn().mockImplementation((handler) => { + mockOnDidCreate = handler + return { dispose: vi.fn() } + }), + onDidChange: vi.fn().mockImplementation((handler) => { + mockOnDidChange = handler + return { dispose: vi.fn() } + }), + onDidDelete: vi.fn().mockImplementation((handler) => { + mockOnDidDelete = handler + return { dispose: vi.fn() } + }), + dispose: vi.fn(), + } + + // Mock createFileSystemWatcher to return our mock watcher + vi.mocked(vscode.workspace.createFileSystemWatcher).mockReturnValue(mockWatcher) + + // Create mock dependencies + mockContext = { + subscriptions: [], + } + + mockCacheManager = { + getHash: vi.fn(), + updateHash: vi.fn(), + deleteHash: vi.fn(), + } + + mockEmbedder = { + createEmbeddings: vi.fn().mockResolvedValue({ embeddings: [[0.1, 0.2, 0.3]] }), + } + + mockVectorStore = { + upsertPoints: vi.fn().mockResolvedValue(undefined), + deletePointsByFilePath: vi.fn().mockResolvedValue(undefined), + } + + mockIgnoreInstance = { + ignores: vi.fn().mockReturnValue(false), + } + + fileWatcher = new FileWatcher( + "/mock/workspace", + mockContext, + mockCacheManager, + mockEmbedder, + mockVectorStore, + mockIgnoreInstance, + ) + }) + + describe("file filtering", () => { + it("should ignore files in hidden directories on create events", async () => { + // Initialize the file watcher + await fileWatcher.initialize() + + // Spy on the vector store to see which files are actually processed + const processedFiles: string[] = [] + mockVectorStore.upsertPoints.mockImplementation(async (points: any[]) => { + points.forEach((point) => { + if (point.payload?.file_path) { + processedFiles.push(point.payload.file_path) + } + }) + }) + + // Simulate file creation events + const testCases = [ + { path: "/mock/workspace/src/file.ts", shouldProcess: true }, + { path: "/mock/workspace/.git/config", shouldProcess: false }, + { path: "/mock/workspace/.hidden/file.ts", shouldProcess: false }, + { path: "/mock/workspace/src/.next/static/file.js", shouldProcess: false }, + { path: "/mock/workspace/node_modules/package/index.js", shouldProcess: false }, + { path: "/mock/workspace/normal/file.js", shouldProcess: true }, + ] + + // Trigger file creation events + for (const { path } of testCases) { + await mockOnDidCreate({ fsPath: path }) + } + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Check that files in hidden directories were not processed + expect(processedFiles).not.toContain("src/.next/static/file.js") + expect(processedFiles).not.toContain(".git/config") + expect(processedFiles).not.toContain(".hidden/file.ts") + }) + + it("should ignore files in hidden directories on change events", async () => { + // Initialize the file watcher + await fileWatcher.initialize() + + // Track which files are processed + const processedFiles: string[] = [] + mockVectorStore.upsertPoints.mockImplementation(async (points: any[]) => { + points.forEach((point) => { + if (point.payload?.file_path) { + processedFiles.push(point.payload.file_path) + } + }) + }) + + // Simulate file change events + const testCases = [ + { path: "/mock/workspace/src/file.ts", shouldProcess: true }, + { path: "/mock/workspace/.vscode/settings.json", shouldProcess: false }, + { path: "/mock/workspace/src/.cache/data.json", shouldProcess: false }, + { path: "/mock/workspace/dist/bundle.js", shouldProcess: false }, + ] + + // Trigger file change events + for (const { path } of testCases) { + await mockOnDidChange({ fsPath: path }) + } + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Check that files in hidden directories were not processed + expect(processedFiles).not.toContain(".vscode/settings.json") + expect(processedFiles).not.toContain("src/.cache/data.json") + }) + + it("should ignore files in hidden directories on delete events", async () => { + // Initialize the file watcher + await fileWatcher.initialize() + + // Track which files are deleted + const deletedFiles: string[] = [] + mockVectorStore.deletePointsByFilePath.mockImplementation(async (filePath: string) => { + deletedFiles.push(filePath) + }) + + // Simulate file deletion events + const testCases = [ + { path: "/mock/workspace/src/file.ts", shouldProcess: true }, + { path: "/mock/workspace/.git/objects/abc123", shouldProcess: false }, + { path: "/mock/workspace/.DS_Store", shouldProcess: false }, + { path: "/mock/workspace/build/.cache/temp.js", shouldProcess: false }, + ] + + // Trigger file deletion events + for (const { path } of testCases) { + await mockOnDidDelete({ fsPath: path }) + } + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Check that files in hidden directories were not processed + expect(deletedFiles).not.toContain(".git/objects/abc123") + expect(deletedFiles).not.toContain(".DS_Store") + expect(deletedFiles).not.toContain("build/.cache/temp.js") + }) + + it("should handle nested hidden directories correctly", async () => { + // Initialize the file watcher + await fileWatcher.initialize() + + // Track which files are processed + const processedFiles: string[] = [] + mockVectorStore.upsertPoints.mockImplementation(async (points: any[]) => { + points.forEach((point) => { + if (point.payload?.file_path) { + processedFiles.push(point.payload.file_path) + } + }) + }) + + // Test deeply nested hidden directories + const testCases = [ + { path: "/mock/workspace/src/components/Button.tsx", shouldProcess: true }, + { path: "/mock/workspace/src/.hidden/components/Button.tsx", shouldProcess: false }, + { path: "/mock/workspace/.hidden/src/components/Button.tsx", shouldProcess: false }, + { path: "/mock/workspace/src/components/.hidden/Button.tsx", shouldProcess: false }, + ] + + // Trigger file creation events + for (const { path } of testCases) { + await mockOnDidCreate({ fsPath: path }) + } + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Check that files in hidden directories were not processed + expect(processedFiles).not.toContain("src/.hidden/components/Button.tsx") + expect(processedFiles).not.toContain(".hidden/src/components/Button.tsx") + expect(processedFiles).not.toContain("src/components/.hidden/Button.tsx") + }) + }) + + describe("dispose", () => { + it("should dispose of the watcher when disposed", async () => { + await fileWatcher.initialize() + fileWatcher.dispose() + + expect(mockWatcher.dispose).toHaveBeenCalled() + }) + }) +}) diff --git a/src/services/code-index/processors/__tests__/scanner.spec.ts b/src/services/code-index/processors/__tests__/scanner.spec.ts index 5e7b168388..b22e90fdf9 100644 --- a/src/services/code-index/processors/__tests__/scanner.spec.ts +++ b/src/services/code-index/processors/__tests__/scanner.spec.ts @@ -209,5 +209,38 @@ describe("DirectoryScanner", () => { expect(mockVectorStore.deletePointsByFilePath).toHaveBeenCalledWith("old/file.js") expect(mockCacheManager.deleteHash).toHaveBeenCalledWith("old/file.js") }) + + it("should filter out files in hidden directories", async () => { + const { listFiles } = await import("../../../glob/list-files") + // Mock listFiles to return files including some in hidden directories + vi.mocked(listFiles).mockResolvedValue([ + [ + "test/file1.js", + "test/.hidden/file2.js", + ".git/config", + "src/.next/static/file3.js", + "normal/file4.js", + ], + false, + ]) + + // Mock parseFile to track which files are actually processed + const processedFiles: string[] = [] + ;(mockCodeParser.parseFile as any).mockImplementation((filePath: string) => { + processedFiles.push(filePath) + return [] + }) + + await scanner.scanDirectory("/test") + + // Verify that only non-hidden files were processed + expect(processedFiles).toEqual(["test/file1.js", "normal/file4.js"]) + expect(processedFiles).not.toContain("test/.hidden/file2.js") + expect(processedFiles).not.toContain(".git/config") + expect(processedFiles).not.toContain("src/.next/static/file3.js") + + // Verify the stats + expect(mockCodeParser.parseFile).toHaveBeenCalledTimes(2) + }) }) }) diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index dfbf0169e3..9a1fc3c9af 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -22,6 +22,7 @@ import { import { codeParser } from "./parser" import { CacheManager } from "../cache-manager" import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path" +import { isPathInIgnoredDirectory } from "../../glob/ignore-utils" /** * Implementation of the file watcher interface @@ -453,6 +454,15 @@ export class FileWatcher implements IFileWatcher { */ async processFile(filePath: string): Promise { try { + // Check if file is in an ignored directory + if (isPathInIgnoredDirectory(filePath)) { + return { + path: filePath, + status: "skipped" as const, + reason: "File is in an ignored directory", + } + } + // Check if file should be ignored const relativeFilePath = generateRelativeFilePath(filePath) if ( diff --git a/src/services/code-index/processors/scanner.ts b/src/services/code-index/processors/scanner.ts index f0dafb60c3..24d3e7dbba 100644 --- a/src/services/code-index/processors/scanner.ts +++ b/src/services/code-index/processors/scanner.ts @@ -22,6 +22,7 @@ import { PARSING_CONCURRENCY, BATCH_PROCESSING_CONCURRENCY, } from "../constants" +import { isPathInIgnoredDirectory } from "../../glob/ignore-utils" export class DirectoryScanner implements IDirectoryScanner { constructor( @@ -61,10 +62,16 @@ export class DirectoryScanner implements IDirectoryScanner { // Filter paths using .rooignore const allowedPaths = ignoreController.filterPaths(filePaths) - // Filter by supported extensions and ignore patterns + // Filter by supported extensions, ignore patterns, and excluded directories const supportedPaths = allowedPaths.filter((filePath) => { const ext = path.extname(filePath).toLowerCase() const relativeFilePath = generateRelativeFilePath(filePath) + + // Check if file is in an ignored directory using the shared helper + if (isPathInIgnoredDirectory(filePath)) { + return false + } + return scannerExtensions.includes(ext) && !this.ignoreInstance.ignores(relativeFilePath) }) diff --git a/src/services/glob/constants.ts b/src/services/glob/constants.ts new file mode 100644 index 0000000000..1ddcc37df9 --- /dev/null +++ b/src/services/glob/constants.ts @@ -0,0 +1,24 @@ +/** + * List of directories that are typically large and should be ignored + * when showing recursive file listings or scanning for code indexing. + * This list is shared between list-files.ts and the codebase indexing scanner + * to ensure consistent behavior across the application. + */ +export const DIRS_TO_IGNORE = [ + "node_modules", + "__pycache__", + "env", + "venv", + "target/dependency", + "build/dependencies", + "dist", + "out", + "bundle", + "vendor", + "tmp", + "temp", + "deps", + "pkg", + "Pods", + ".*", +] diff --git a/src/services/glob/ignore-utils.ts b/src/services/glob/ignore-utils.ts new file mode 100644 index 0000000000..9c80375e66 --- /dev/null +++ b/src/services/glob/ignore-utils.ts @@ -0,0 +1,45 @@ +import { DIRS_TO_IGNORE } from "./constants" + +/** + * Checks if a file path should be ignored based on the DIRS_TO_IGNORE patterns. + * This function handles special patterns like ".*" for hidden directories. + * + * @param filePath The file path to check + * @returns true if the path should be ignored, false otherwise + */ +export function isPathInIgnoredDirectory(filePath: string): boolean { + // Normalize path separators + const normalizedPath = filePath.replace(/\\/g, "/") + const pathParts = normalizedPath.split("/") + + // Check each directory in the path against DIRS_TO_IGNORE + for (const part of pathParts) { + // Skip empty parts (from leading or trailing slashes) + if (!part) continue + + // Handle the ".*" pattern for hidden directories + if (DIRS_TO_IGNORE.includes(".*") && part.startsWith(".") && part !== ".") { + return true + } + + // Check for exact matches + if (DIRS_TO_IGNORE.includes(part)) { + return true + } + } + + // Check if path contains any ignored directory pattern + for (const dir of DIRS_TO_IGNORE) { + if (dir === ".*") { + // Already handled above + continue + } + + // Check if the directory appears in the path + if (normalizedPath.includes(`/${dir}/`)) { + return true + } + } + + return false +} diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index e1809ba4e8..d615360a09 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -5,29 +5,7 @@ import * as childProcess from "child_process" import * as vscode from "vscode" import { arePathsEqual } from "../../utils/path" import { getBinPath } from "../../services/ripgrep" - -/** - * List of directories that are typically large and should be ignored - * when showing recursive file listings - */ -const DIRS_TO_IGNORE = [ - "node_modules", - "__pycache__", - "env", - "venv", - "target/dependency", - "build/dependencies", - "dist", - "out", - "bundle", - "vendor", - "tmp", - "temp", - "deps", - "pkg", - "Pods", - ".*", -] +import { DIRS_TO_IGNORE } from "./constants" /** * List files in a directory, with optional recursive traversal