From f3cf4433a57c5f1cb59adb1eb26b837c303197b6 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Tue, 16 Sep 2025 23:48:35 +0000 Subject: [PATCH] fix: handle list_files for paths inside ignored directories - Add checkPathForIgnoredSegments helper to detect if a path contains any ignored directory segments - Modify buildRecursiveArgs to treat paths inside ignored directories (e.g., node_modules/@types/) the same as explicitly targeting those directories - Apply same logic to buildNonRecursiveArgs for consistency - Add comprehensive tests for node_modules/@types scenarios This fixes the issue where list_files would return only directories when targeting paths inside ignored ancestors like node_modules/@types/stream-json. Fixes #4263 --- .../glob/__tests__/list-files.spec.ts | 240 ++++++++++++++++++ src/services/glob/list-files.ts | 111 +++++--- 2 files changed, 311 insertions(+), 40 deletions(-) diff --git a/src/services/glob/__tests__/list-files.spec.ts b/src/services/glob/__tests__/list-files.spec.ts index b987319419..c1557f32e7 100644 --- a/src/services/glob/__tests__/list-files.spec.ts +++ b/src/services/glob/__tests__/list-files.spec.ts @@ -558,3 +558,243 @@ describe("buildRecursiveArgs edge cases", () => { expect(args).not.toContain("--no-ignore") }) }) + +describe("node_modules/@types scenarios", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it("should include files when targeting paths inside node_modules", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + // Simulate files that should be found in node_modules/@types/stream-json + setTimeout(() => { + callback("index.d.ts\n") + callback("Parser.d.ts\n") + callback("StreamBase.d.ts\n") + }, 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + }), + kill: vi.fn(), + } + mockSpawn.mockReturnValue(mockProcess as any) + + // Mock directory listing for node_modules/@types/stream-json + const mockReaddir = vi.fn() + vi.mocked(fs.promises).readdir = mockReaddir + mockReaddir.mockResolvedValueOnce([]) + + // Call listFiles targeting node_modules/@types/stream-json + const [files] = await listFiles("/test/node_modules/@types/stream-json", true, 100) + + // Verify ripgrep was called with correct arguments + const [rgPath, args] = mockSpawn.mock.calls[0] + + // When targeting a path inside node_modules, these flags should be present + expect(args).toContain("--no-ignore-vcs") + expect(args).toContain("--no-ignore") + + // Check for the inclusion patterns + expect(args).toContain("-g") + const gIndex = args.indexOf("-g") + expect(args[gIndex + 1]).toBe("*") + + // Verify that files are included in the results + const fileNames = files.map((f) => path.basename(f)) + expect(fileNames).toContain("index.d.ts") + expect(fileNames).toContain("Parser.d.ts") + expect(fileNames).toContain("StreamBase.d.ts") + }) + + it("should include files when targeting nested paths inside ignored directories", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + // Simulate files in a deeply nested ignored path + setTimeout(() => { + callback("lib/index.js\n") + callback("lib/utils.js\n") + callback("package.json\n") + }, 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + }), + kill: vi.fn(), + } + mockSpawn.mockReturnValue(mockProcess as any) + + // Mock directory listing + const mockReaddir = vi.fn() + vi.mocked(fs.promises).readdir = mockReaddir + mockReaddir.mockResolvedValueOnce([{ name: "lib", isDirectory: () => true, isSymbolicLink: () => false }]) + + // Call listFiles targeting a path deep inside node_modules + const [files] = await listFiles("/test/node_modules/some-package/dist", true, 100) + + // Verify ripgrep was called with correct arguments + const [rgPath, args] = mockSpawn.mock.calls[0] + + // Should have the special flags for ignored paths + expect(args).toContain("--no-ignore-vcs") + expect(args).toContain("--no-ignore") + + // Verify files are included + const fileNames = files.map((f) => path.basename(f)) + expect(fileNames).toContain("index.js") + expect(fileNames).toContain("utils.js") + expect(fileNames).toContain("package.json") + }) + + it("should handle non-recursive listing inside ignored directories", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + // Simulate files at the root of node_modules/@types + setTimeout(() => { + callback("stream-json/\n") + callback("node/\n") + }, 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + }), + kill: vi.fn(), + } + mockSpawn.mockReturnValue(mockProcess as any) + + // Mock directory listing + const mockReaddir = vi.fn() + vi.mocked(fs.promises).readdir = mockReaddir + mockReaddir.mockResolvedValueOnce([ + { name: "stream-json", isDirectory: () => true, isSymbolicLink: () => false }, + { name: "node", isDirectory: () => true, isSymbolicLink: () => false }, + ]) + + // Call listFiles non-recursively inside node_modules/@types + const [files] = await listFiles("/test/node_modules/@types", false, 100) + + // Verify ripgrep was called with correct arguments + const [rgPath, args] = mockSpawn.mock.calls[0] + + // Should have the special flags for ignored paths + expect(args).toContain("--no-ignore-vcs") + expect(args).toContain("--no-ignore") + expect(args).toContain("--maxdepth") + expect(args).toContain("1") + + // Verify directories are included + const dirNames = files + .filter((f) => f.endsWith("/")) + .map((f) => { + const parts = f.split("/") + return parts[parts.length - 2] + "/" + }) + expect(dirNames).toContain("stream-json/") + expect(dirNames).toContain("node/") + }) + + it("should not apply node_modules exclusion to paths containing node_modules", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + setTimeout(() => callback("file.txt\n"), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + }), + kill: vi.fn(), + } + mockSpawn.mockReturnValue(mockProcess as any) + + // Test with a path containing node_modules + await listFiles("/test/node_modules/package/src", true, 100) + + const [rgPath, args] = mockSpawn.mock.calls[0] + + // Should NOT have the node_modules exclusion pattern since we're inside node_modules + expect(args).not.toContain("!**/node_modules/**") + + // Should have the flags to ignore gitignore rules + expect(args).toContain("--no-ignore-vcs") + expect(args).toContain("--no-ignore") + }) + + it("should handle paths with multiple ignored segments correctly", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + setTimeout(() => callback("output.js\n"), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + }), + kill: vi.fn(), + } + mockSpawn.mockReturnValue(mockProcess as any) + + // Test with a path containing multiple ignored segments + await listFiles("/test/node_modules/package/dist/bundle", true, 100) + + const [rgPath, args] = mockSpawn.mock.calls[0] + + // Should have the special handling flags + expect(args).toContain("--no-ignore-vcs") + expect(args).toContain("--no-ignore") + + // Should not exclude the first ignored segment found (node_modules) + expect(args).not.toContain("!**/node_modules/**") + + // But should still exclude other ignored directories not in the path + expect(args).toContain("!**/__pycache__/**") + expect(args).toContain("!**/venv/**") + }) +}) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index 5366bbb84b..3ffcbbf29f 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -223,10 +223,42 @@ function buildRipgrepArgs(dirPath: string, recursive: boolean): string[] { if (recursive) { return [...args, ...buildRecursiveArgs(dirPath), dirPath] } else { - return [...args, ...buildNonRecursiveArgs(), dirPath] + return [...args, ...buildNonRecursiveArgs(dirPath), dirPath] } } +/** + * Check if a path contains any ignored directory segment + * @param dirPath - The directory path to check + * @returns Object with flags indicating if path contains ignored segments + */ +function checkPathForIgnoredSegments(dirPath: string): { + hasHiddenSegment: boolean + ignoredSegment: string | null +} { + // Normalize the path first to handle edge cases + const normalizedPath = path.normalize(dirPath) + // Split by separator and filter out empty parts + // This handles cases like trailing slashes, multiple separators, etc. + const pathParts = normalizedPath.split(path.sep).filter((part) => part.length > 0) + + // Check for hidden directories (starting with .) + const hasHiddenSegment = pathParts.some((part) => part.startsWith(".")) + + // Check if any segment in the path is in the ignore list + let ignoredSegment: string | null = null + for (const part of pathParts) { + // Skip the ".*" pattern as it's handled separately via hasHiddenSegment + const nonHiddenIgnorePatterns = DIRS_TO_IGNORE.filter((pattern) => pattern !== ".*") + if (nonHiddenIgnorePatterns.includes(part)) { + ignoredSegment = part + break + } + } + + return { hasHiddenSegment, ignoredSegment } +} + /** * Build ripgrep arguments for recursive directory traversal */ @@ -236,21 +268,12 @@ function buildRecursiveArgs(dirPath: string): string[] { // In recursive mode, respect .gitignore by default // (ripgrep does this automatically) - // Check if we're explicitly targeting a hidden directory - // Normalize the path first to handle edge cases - const normalizedPath = path.normalize(dirPath) - // Split by separator and filter out empty parts - // This handles cases like trailing slashes, multiple separators, etc. - const pathParts = normalizedPath.split(path.sep).filter((part) => part.length > 0) - const isTargetingHiddenDir = pathParts.some((part) => part.startsWith(".")) - - // Get the target directory name to check if it's in the ignore list - const targetDirName = path.basename(dirPath) - const isTargetInIgnoreList = DIRS_TO_IGNORE.includes(targetDirName) + // Check if the path contains any ignored segments (not just the basename) + const { hasHiddenSegment, ignoredSegment } = checkPathForIgnoredSegments(dirPath) - // If targeting a hidden directory or a directory in the ignore list, - // use special handling to ensure all files are shown - if (isTargetingHiddenDir || isTargetInIgnoreList) { + // If path contains ignored segments (either hidden or explicitly ignored), + // treat it the same as explicitly targeting that ignored directory + if (hasHiddenSegment || ignoredSegment) { args.push("--no-ignore-vcs") args.push("--no-ignore") @@ -261,27 +284,23 @@ function buildRecursiveArgs(dirPath: string): string[] { } // Apply directory exclusions for recursive searches + // But skip exclusions for directories that are in the path we're targeting for (const dir of DIRS_TO_IGNORE) { // Special handling for hidden directories pattern if (dir === ".*") { - // If we're explicitly targeting a hidden directory, don't exclude hidden files/dirs - // This allows the target hidden directory and all its contents to be listed - if (!isTargetingHiddenDir) { - // Not targeting hidden dir: exclude all hidden directories + // If path contains hidden segments, don't exclude hidden files/dirs + if (!hasHiddenSegment) { + // Not targeting path with hidden segments: exclude all hidden directories args.push("-g", `!**/.*/**`) } - // If targeting hidden dir: don't add any exclusion for hidden directories + // If path contains hidden segments: don't add any exclusion for hidden directories continue } - // When explicitly targeting a directory that's in the ignore list (e.g., "temp"), - // we need special handling: - // - Don't add any exclusion pattern for the target directory itself - // - Only exclude nested subdirectories with the same name - // This ensures all files in the target directory are listed, while still - // preventing recursion into nested directories with the same ignored name - if (dir === targetDirName && isTargetInIgnoreList) { - // Skip adding any exclusion pattern - we want to see everything in the target directory + // If this ignored directory is part of our target path, don't exclude it + // This allows listing contents inside ignored ancestors like node_modules/@types/ + if (ignoredSegment === dir) { + // Skip adding any exclusion pattern - we want to see everything in paths containing this segment continue } @@ -295,9 +314,18 @@ function buildRecursiveArgs(dirPath: string): string[] { /** * Build ripgrep arguments for non-recursive directory listing */ -function buildNonRecursiveArgs(): string[] { +function buildNonRecursiveArgs(dirPath: string): string[] { const args: string[] = [] + // Check if the path contains any ignored segments + const { hasHiddenSegment, ignoredSegment } = checkPathForIgnoredSegments(dirPath) + + // If path contains ignored segments, use special handling + if (hasHiddenSegment || ignoredSegment) { + args.push("--no-ignore-vcs") + args.push("--no-ignore") + } + // For non-recursive, limit to the current directory level args.push("-g", "*") args.push("--maxdepth", "1") // ripgrep uses maxdepth, not max-depth @@ -306,17 +334,20 @@ function buildNonRecursiveArgs(): string[] { // (ripgrep respects .gitignore by default) // Apply directory exclusions for non-recursive searches - for (const dir of DIRS_TO_IGNORE) { - if (dir === ".*") { - // For hidden directories in non-recursive mode, we want to show the directories - // themselves but not their contents. Since we're using --maxdepth 1, this - // naturally happens - we just need to avoid excluding the directories entirely. - // We'll let the directory scanning logic handle the visibility. - continue - } else { - // Direct children only - args.push("-g", `!${dir}`) - args.push("-g", `!${dir}/**`) + // But skip exclusions if we're inside an ignored path + if (!hasHiddenSegment && !ignoredSegment) { + for (const dir of DIRS_TO_IGNORE) { + if (dir === ".*") { + // For hidden directories in non-recursive mode, we want to show the directories + // themselves but not their contents. Since we're using --maxdepth 1, this + // naturally happens - we just need to avoid excluding the directories entirely. + // We'll let the directory scanning logic handle the visibility. + continue + } else { + // Direct children only + args.push("-g", `!${dir}`) + args.push("-g", `!${dir}/**`) + } } }