From 777ae1f947d93e6c43895e25d8aa08a297035964 Mon Sep 17 00:00:00 2001 From: X9VoiD Date: Sun, 6 Jul 2025 14:41:49 +0800 Subject: [PATCH 1/6] fix(services/glob): handle nested .gitignores - Rewrote listFiles to use git ls-files for git repos (faster and respects all .gitignores) - Added fallback to manual file walk with ignore package for non-git repos - Optimized recursion to avoid traversing ignored directories - Added comprehensive integration tests - Improved test coverage with new unit tests for edge cases - Fixed path normalization to ensure consistent forward slashes The new implementation: 1. Handles nested .gitignore files correctly 2. Provides significant performance improvements in large repos 3. Maintains compatibility with non-git projects 4. Ensures consistent behavior across environments A caveat of this is that directories by themselves are not reported anymore if they don't contain any files. --- .../__tests__/list-files.integration.spec.ts | 181 +++++++ .../glob/__tests__/list-files.spec.ts | 361 +++++++++---- src/services/glob/list-files.ts | 495 ++++++------------ 3 files changed, 604 insertions(+), 433 deletions(-) create mode 100644 src/services/glob/__tests__/list-files.integration.spec.ts diff --git a/src/services/glob/__tests__/list-files.integration.spec.ts b/src/services/glob/__tests__/list-files.integration.spec.ts new file mode 100644 index 0000000000..db52fea6a3 --- /dev/null +++ b/src/services/glob/__tests__/list-files.integration.spec.ts @@ -0,0 +1,181 @@ +import { describe, it, expect, beforeAll, afterAll } from "vitest" +import os from "os" +import * as path from "path" +import * as fs from "fs" +import simpleGit from "simple-git" +import { listFiles } from "../list-files" +import { getWorkspacePath } from "../../../utils/path" + +vi.mock("../../../utils/path", () => ({ + getWorkspacePath: vi.fn(), + arePathsEqual: (a: string, b: string) => path.resolve(a) === path.resolve(b), +})) + +describe("listFiles integration tests", () => { + let testRepoPath: string + + beforeAll(async () => { + // Create a temporary directory for the Git repo + testRepoPath = path.join(os.tmpdir(), `test-repo-${Date.now()}`) + await fs.promises.mkdir(testRepoPath, { recursive: true }) + + // Initialize a Git repository + const git = simpleGit(testRepoPath) + await git.init() + + // Create root .gitignore + await fs.promises.writeFile(path.join(testRepoPath, ".gitignore"), "root_ignored.txt\nignored_in_root") + + // Create files and directories + await fs.promises.writeFile(path.join(testRepoPath, "root_kept.txt"), "content") + await fs.promises.writeFile(path.join(testRepoPath, "root_ignored.txt"), "content") + + // Subdirectory 1 with its own .gitignore + const subdir1Path = path.join(testRepoPath, "subdir1") + await fs.promises.mkdir(subdir1Path) + await fs.promises.writeFile(path.join(subdir1Path, ".gitignore"), "subdir1_ignored.txt") + await fs.promises.writeFile(path.join(subdir1Path, "subdir1_kept.txt"), "content") + await fs.promises.writeFile(path.join(subdir1Path, "subdir1_ignored.txt"), "content") + + // Subdirectory 2 (should be affected by root .gitignore) + const subdir2Path = path.join(testRepoPath, "subdir2") + await fs.promises.mkdir(subdir2Path) + await fs.promises.writeFile(path.join(subdir2Path, "subdir2_kept.txt"), "content") + await fs.promises.writeFile(path.join(testRepoPath, "ignored_in_root"), "content") + }) + + afterAll(async () => { + // Cleanup the temporary directory + await fs.promises.rm(testRepoPath, { recursive: true, force: true }) + }) + + it("should correctly handle multiple .gitignore files", async () => { + vi.mocked(getWorkspacePath).mockReturnValue(testRepoPath) + + const [files] = await listFiles(testRepoPath, true, 1000) + + const expectedFiles = [ + path.join(testRepoPath, "root_kept.txt"), + path.join(testRepoPath, "subdir1", "subdir1_kept.txt"), + path.join(testRepoPath, "subdir2", "subdir2_kept.txt"), + ].map((p) => p.replace(/\\/g, "/")) + + const normalizedFiles = files.map((p) => p.replace(/\\/g, "/")) + const sortedFiles = normalizedFiles.sort() + const sortedExpected = expectedFiles.sort() + + expect(sortedFiles).toEqual(sortedExpected) + + // Verify that ignored files are not present + expect(normalizedFiles.some((f) => f.endsWith("root_ignored.txt"))).toBe(false) + expect(normalizedFiles.some((f) => f.endsWith("subdir1_ignored.txt"))).toBe(false) + expect(normalizedFiles.some((f) => f.endsWith("ignored_in_root"))).toBe(false) + }) + + it("should list only top-level non-ignored files in root (git repo)", async () => { + vi.mocked(getWorkspacePath).mockReturnValue(testRepoPath) + const [files] = await listFiles(testRepoPath, false, 1000) + const expectedFiles = [ + path.join(testRepoPath, "root_kept.txt"), + path.join(testRepoPath, "subdir1"), + path.join(testRepoPath, "subdir2"), + ].map((p) => p.replace(/\\/g, "/")) + const normalizedFiles = files.map((p) => p.replace(/\\/g, "/")) + const sortedFiles = normalizedFiles.sort() + const sortedExpected = expectedFiles.sort() + expect(sortedFiles).toEqual(sortedExpected) + expect(normalizedFiles.some((f) => f.endsWith("root_ignored.txt"))).toBe(false) + expect(normalizedFiles.some((f) => f.endsWith("ignored_in_root"))).toBe(false) + }) + + it("should list only top-level non-ignored files in subdir1 (git repo, nested)", async () => { + vi.mocked(getWorkspacePath).mockReturnValue(testRepoPath) + const subdir1Path = path.join(testRepoPath, "subdir1") + const [files] = await listFiles(subdir1Path, false, 1000) + const expectedFiles = [path.join(subdir1Path, "subdir1_kept.txt")].map((p) => p.replace(/\\/g, "/")) + const normalizedFiles = files.map((p) => p.replace(/\\/g, "/")) + const sortedFiles = normalizedFiles.sort() + const sortedExpected = expectedFiles.sort() + expect(sortedFiles).toEqual(sortedExpected) + expect(normalizedFiles.some((f) => f.endsWith("subdir1_ignored.txt"))).toBe(false) + }) +}) + +describe("listFiles in non-Git repository", () => { + let testRepoPath: string + + beforeAll(async () => { + // Create a temporary directory that is NOT a Git repo + testRepoPath = path.join(os.tmpdir(), `test-non-git-repo-${Date.now()}`) + await fs.promises.mkdir(testRepoPath, { recursive: true }) + + // Create files and directories + await fs.promises.writeFile(path.join(testRepoPath, "root_kept.txt"), "content") + + // Subdirectory 1 + const subdir1Path = path.join(testRepoPath, "subdir1") + await fs.promises.mkdir(subdir1Path) + await fs.promises.writeFile(path.join(subdir1Path, "subdir1_kept.txt"), "content") + + // Subdirectory 2 + const subdir2Path = path.join(testRepoPath, "subdir2") + await fs.promises.mkdir(subdir2Path) + await fs.promises.writeFile(path.join(subdir2Path, "subdir2_kept.txt"), "content") + + vi.mocked(getWorkspacePath).mockReturnValue(testRepoPath) + }) + + afterAll(async () => { + // Cleanup the temporary directory + await fs.promises.rm(testRepoPath, { recursive: true, force: true }) + }) + + it("should fall back to manual file search when not a git repository", async () => { + const [files] = await listFiles(testRepoPath, true, 1000) + + const expectedFiles = [ + path.join(testRepoPath, "root_kept.txt"), + path.join(testRepoPath, "subdir1", "subdir1_kept.txt"), + path.join(testRepoPath, "subdir2", "subdir2_kept.txt"), + ].map((p) => p.replace(/\\/g, "/")) + + const normalizedFiles = files.map((p) => p.replace(/\\/g, "/")) + const sortedFiles = normalizedFiles.sort() + const sortedExpected = expectedFiles.sort() + + expect(sortedFiles).toEqual(sortedExpected) + + // Verify that ignored files are not present + expect(normalizedFiles.some((f) => f.endsWith("root_ignored.txt"))).toBe(false) + expect(normalizedFiles.some((f) => f.endsWith("subdir1_ignored.txt"))).toBe(false) + expect(normalizedFiles.some((f) => f.endsWith("ignored_in_root"))).toBe(false) + }) + + it("should list only top-level non-ignored files in root (no git)", async () => { + vi.mocked(getWorkspacePath).mockReturnValue(testRepoPath) + const [files] = await listFiles(testRepoPath, false, 1000) + const expectedFiles = [ + path.join(testRepoPath, "root_kept.txt"), + path.join(testRepoPath, "subdir1"), + path.join(testRepoPath, "subdir2"), + ].map((p) => p.replace(/\\/g, "/")) + const normalizedFiles = files.map((p) => p.replace(/\\/g, "/")) + const sortedFiles = normalizedFiles.sort() + const sortedExpected = expectedFiles.sort() + expect(sortedFiles).toEqual(sortedExpected) + expect(normalizedFiles.some((f) => f.endsWith("root_ignored.txt"))).toBe(false) + expect(normalizedFiles.some((f) => f.endsWith("ignored_in_root"))).toBe(false) + }) + + it("should list only top-level non-ignored files in subdir1 (no git, nested)", async () => { + vi.mocked(getWorkspacePath).mockReturnValue(testRepoPath) + const subdir1Path = path.join(testRepoPath, "subdir1") + const [files] = await listFiles(subdir1Path, false, 1000) + const expectedFiles = [path.join(subdir1Path, "subdir1_kept.txt")].map((p) => p.replace(/\\/g, "/")) + const normalizedFiles = files.map((p) => p.replace(/\\/g, "/")) + const sortedFiles = normalizedFiles.sort() + const sortedExpected = expectedFiles.sort() + expect(sortedFiles).toEqual(sortedExpected) + expect(normalizedFiles.some((f) => f.endsWith("subdir1_ignored.txt"))).toBe(false) + }) +}) diff --git a/src/services/glob/__tests__/list-files.spec.ts b/src/services/glob/__tests__/list-files.spec.ts index 7797e40726..6cae67f1ec 100644 --- a/src/services/glob/__tests__/list-files.spec.ts +++ b/src/services/glob/__tests__/list-files.spec.ts @@ -1,123 +1,294 @@ -import { vi, describe, it, expect, beforeEach } from "vitest" +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest" +import os from "os" import * as path from "path" +import simpleGit from "simple-git" +import * as fs from "fs" -// Mock ripgrep to avoid filesystem dependencies -vi.mock("../../ripgrep", () => ({ - getBinPath: vi.fn().mockResolvedValue("/mock/path/to/rg"), -})) +import { listFiles } from "../list-files" +import { getWorkspacePath, arePathsEqual } from "../../../utils/path" -// Mock vscode -vi.mock("vscode", () => ({ - env: { - appRoot: "/mock/app/root", - }, +// Mock modules +vi.mock("simple-git", () => ({ + __esModule: true, + default: vi.fn(() => ({ + checkIgnore: vi.fn(), + })), })) -// Mock filesystem operations -vi.mock("fs", () => ({ - promises: { - access: vi.fn().mockRejectedValue(new Error("Not found")), - readFile: vi.fn().mockResolvedValue(""), - readdir: vi.fn().mockResolvedValue([]), - }, -})) +vi.mock("fs", () => { + return { + promises: { + readdir: vi.fn(), + }, + existsSync: vi.fn(), + readFileSync: vi.fn(), + } +}) -vi.mock("child_process", () => ({ - spawn: vi.fn(), +vi.mock("../../../utils/path", () => ({ + getWorkspacePath: vi.fn(), + arePathsEqual: vi.fn(), })) -vi.mock("../../path", () => ({ - arePathsEqual: vi.fn().mockReturnValue(false), -})) +function normalizePath(p: string): string { + return p.replace(/\\/g, "/") +} -import { listFiles } from "../list-files" -import * as childProcess from "child_process" +describe("listFiles", () => { + const workspacePath = path.join(os.tmpdir(), "test-workspace") + + // Get references to the mocked functions + const mockReaddir = vi.mocked(fs.promises.readdir) + const mockSimpleGit = vi.mocked(simpleGit) + const mockCheckIgnore = vi.fn() -describe("list-files symlink support", () => { beforeEach(() => { + // Setup mocks + vi.mocked(getWorkspacePath).mockReturnValue(workspacePath) + mockSimpleGit.mockReturnValue({ + checkIgnore: mockCheckIgnore, + } as any) + + // Default: no .gitignore exists + vi.mocked(fs.existsSync).mockImplementation((p) => false) + vi.mocked(fs.readFileSync).mockImplementation((p) => "") + + // Clear mocks + mockSimpleGit.mockClear() + vi.mocked(getWorkspacePath).mockClear() + vi.mocked(arePathsEqual).mockClear() + mockCheckIgnore.mockClear() + mockReaddir.mockClear() + }) + + afterEach(() => { vi.clearAllMocks() }) - it("should include --follow flag in ripgrep arguments", async () => { - const mockSpawn = vi.mocked(childProcess.spawn) - const mockProcess = { - stdout: { - on: vi.fn((event, callback) => { - if (event === "data") { - // Simulate some output to complete the process - setTimeout(() => callback("test-file.txt\n"), 10) - } - }), - }, - stderr: { - on: vi.fn(), - }, - on: vi.fn((event, callback) => { - if (event === "close") { - setTimeout(() => callback(0), 20) - } - if (event === "error") { - // No error simulation - } - }), - kill: vi.fn(), - } + describe("special directories", () => { + it("returns root directory on Windows without listing", async () => { + const root = "C:\\" + vi.mocked(arePathsEqual).mockImplementation((a, b) => a === b) + + const [files, limitReached] = await listFiles(root, true, 100) + const expected = [normalizePath(path.resolve(root))] + expect(files).toEqual(expected) + expect(limitReached).toBe(false) + }) - mockSpawn.mockReturnValue(mockProcess as any) + it("returns root directory on POSIX without listing", async () => { + const root = "/" + vi.mocked(arePathsEqual).mockImplementation((a, b) => { + return path.resolve(a as string) === path.resolve(b as string) + }) + // Ensure readdir is not called for root + mockReaddir.mockResolvedValue([]) - // Call listFiles to trigger ripgrep execution - await listFiles("/test/dir", false, 100) + const [files, limitReached] = await listFiles(root, true, 100) + const expected = [normalizePath(path.resolve(root))] + expect(files).toEqual(expected) + expect(limitReached).toBe(false) + expect(mockReaddir).not.toHaveBeenCalled() + }) - // Verify that spawn was called with --follow flag (the critical fix) - const [rgPath, args] = mockSpawn.mock.calls[0] - expect(rgPath).toBe("/mock/path/to/rg") - expect(args).toContain("--files") - expect(args).toContain("--hidden") - expect(args).toContain("--follow") // This is the critical assertion - the fix should add this flag + it("returns home directory without listing", async () => { + const home = os.homedir() + vi.mocked(arePathsEqual).mockReturnValueOnce(false) // Not root + vi.mocked(arePathsEqual).mockReturnValueOnce(true) // Home - // Platform-agnostic path check - verify the last argument is the resolved path - const expectedPath = path.resolve("/test/dir") - expect(args[args.length - 1]).toBe(expectedPath) + const [files, limitReached] = await listFiles(home, true, 100) + const expected = [normalizePath(path.resolve(home))] + expect(files).toEqual(expected) + expect(limitReached).toBe(false) + }) + + it("should ignore files in directories specified by DIRS_TO_IGNORE", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [ + { name: "node_modules", isDirectory: () => true }, + { name: "regular.js", isDirectory: () => false }, + ] + mockReaddir.mockResolvedValueOnce(entries as any) + mockCheckIgnore.mockResolvedValue([]) + + const [files] = await listFiles(dirPath, true, 100) + expect(files).toContain(normalizePath(path.join(dirPath, "regular.js"))) + expect(files.some((f) => f.includes("node_modules"))).toBe(false) + // Verify readdir was not called on the ignored directory + expect(mockReaddir).toHaveBeenCalledTimes(1) + }) }) - it("should include --follow flag for recursive listings too", async () => { - const mockSpawn = vi.mocked(childProcess.spawn) - const mockProcess = { - stdout: { - on: vi.fn((event, callback) => { - if (event === "data") { - setTimeout(() => callback("test-file.txt\n"), 10) - } - }), - }, - stderr: { - on: vi.fn(), - }, - on: vi.fn((event, callback) => { - if (event === "close") { - setTimeout(() => callback(0), 20) + describe("non-special directories", () => { + it("lists top-level files and directories non-recursively", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [ + { name: "file1.txt", isDirectory: () => false }, + { name: "dir1", isDirectory: () => true }, + ] + mockReaddir.mockResolvedValue(entries as any) + mockCheckIgnore.mockResolvedValue([]) + + const [files, limitReached] = await listFiles(dirPath, false, 100) + expect(files).toEqual([ + normalizePath(path.join(dirPath, "file1.txt")), + normalizePath(path.join(dirPath, "dir1")), + ]) + expect(limitReached).toBe(false) + }) + + it("lists files recursively", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [ + { name: "file1.txt", isDirectory: () => false }, + { name: "subdir", isDirectory: () => true }, + ] + const subEntries = [{ name: "file2.txt", isDirectory: () => false }] + mockReaddir + .mockResolvedValueOnce(entries as any) // Top level + .mockResolvedValueOnce(subEntries as any) // subdir + mockCheckIgnore.mockResolvedValue([]) + + const [files, limitReached] = await listFiles(dirPath, true, 100) + expect(files.sort()).toEqual( + [ + normalizePath(path.join(dirPath, "file1.txt")), + normalizePath(path.join(dirPath, "subdir", "file2.txt")), + ].sort(), + ) + expect(limitReached).toBe(false) + }) + + it("filters git-ignored files", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [ + { name: "file1.txt", isDirectory: () => false }, + { name: "ignored.txt", isDirectory: () => false }, + ] + mockReaddir.mockResolvedValue(entries as any) + mockCheckIgnore.mockResolvedValue([path.join("test-dir", "ignored.txt")]) + + // Simulate .gitignore exists and ignores ignored.txt + vi.mocked(fs.existsSync).mockImplementation((p) => { + return p.toString().endsWith(".gitignore") + }) + vi.mocked(fs.readFileSync).mockImplementation((p) => { + if (p.toString().endsWith(".gitignore")) { + return "ignored.txt" } - if (event === "error") { - // No error simulation + return "" + }) + + const [files, limitReached] = await listFiles(dirPath, false, 100) + expect(files).toEqual([normalizePath(path.join(dirPath, "file1.txt"))]) + expect(limitReached).toBe(false) + }) + + it("respects the limit", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = Array(10) + .fill(0) + .map((_, i) => ({ + name: `file${i}.txt`, + isDirectory: () => false, + })) + mockReaddir.mockResolvedValue(entries as any) + mockCheckIgnore.mockResolvedValue([]) + + const [files, limitReached] = await listFiles(dirPath, false, 5) + expect(files.every((f) => path.isAbsolute(f))).toBe(true) + expect(files).toHaveLength(5) + expect(limitReached).toBe(true) + }) + + it("handles empty directories", async () => { + const dirPath = path.join(workspacePath, "empty-dir") + mockReaddir.mockResolvedValue([]) + mockCheckIgnore.mockResolvedValue([]) + const [files, limitReached] = await listFiles(dirPath, true, 100) + expect(files).toEqual([]) + expect(limitReached).toBe(false) + }) + + it("handles directory with only ignored files", async () => { + const dirPath = path.join(workspacePath, "ignored-only") + const entries = [{ name: "ignored.txt", isDirectory: () => false }] + mockReaddir.mockResolvedValue(entries as any) + mockCheckIgnore.mockResolvedValue([path.join("ignored-only", "ignored.txt")]) + + // Simulate .gitignore exists and ignores ignored.txt + vi.mocked(fs.existsSync).mockImplementation((p) => { + return p.toString().endsWith(".gitignore") + }) + vi.mocked(fs.readFileSync).mockImplementation((p) => { + if (p.toString().endsWith(".gitignore")) { + return "ignored.txt" } - }), - kill: vi.fn(), - } + return "" + }) - mockSpawn.mockReturnValue(mockProcess as any) + const [files, limitReached] = await listFiles(dirPath, false, 100) + expect(files).toEqual([]) + expect(limitReached).toBe(false) + }) - // Call listFiles with recursive=true - await listFiles("/test/dir", true, 100) + it("does not traverse into git-ignored directories for performance", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [ + { name: "dist", isDirectory: () => true }, + { name: "file.txt", isDirectory: () => false }, + ] + mockReaddir.mockResolvedValueOnce(entries as any) + mockCheckIgnore.mockResolvedValue([path.join("test-dir", "dist")]) + + const [files] = await listFiles(dirPath, true, 100) + + expect(files).toEqual([normalizePath(path.join(dirPath, "file.txt"))]) + // Check that readdir was only called for the top-level directory + expect(mockReaddir).toHaveBeenCalledTimes(1) + }) + + it("should return paths with forward slashes, relative to workspace", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [{ name: "file.txt", isDirectory: () => false }] + mockReaddir.mockResolvedValue(entries as any) + mockCheckIgnore.mockResolvedValue([]) + + const [files] = await listFiles(dirPath, false, 100) + + expect(files).toEqual([normalizePath(path.join(dirPath, "file.txt"))]) + }) + }) + + describe("error handling", () => { + it("skips unreadable directories during recursion", async () => { + const dirPath = path.join(workspacePath, "test-dir") + const entries = [ + { name: "unreadable", isDirectory: () => true }, + { name: "file.txt", isDirectory: () => false }, + ] + mockReaddir.mockResolvedValueOnce(entries as any) + // Mock the error for the unreadable directory + mockReaddir.mockRejectedValueOnce(new Error("Permission denied")) + mockCheckIgnore.mockResolvedValue([]) + + // Simulate .gitignore exists but does not ignore anything + vi.mocked(fs.existsSync).mockImplementation((p) => { + return p.toString().endsWith(".gitignore") + }) + vi.mocked(fs.readFileSync).mockImplementation((p) => { + if (p.toString().endsWith(".gitignore")) { + return "" + } + return "" + }) - // Verify that spawn was called with --follow flag (the critical fix) - const [rgPath, args] = mockSpawn.mock.calls[0] - expect(rgPath).toBe("/mock/path/to/rg") - expect(args).toContain("--files") - expect(args).toContain("--hidden") - expect(args).toContain("--follow") // This should be present in recursive mode too + const [files] = await listFiles(dirPath, true, 100) - // Platform-agnostic path check - verify the last argument is the resolved path - const expectedPath = path.resolve("/test/dir") - expect(args[args.length - 1]).toBe(expectedPath) + // Should still return the readable file (not the unreadable directory) + expect(files.sort()).toEqual([normalizePath(path.join(dirPath, "file.txt"))].sort()) + // Should have tried to read both directories + expect(mockReaddir).toHaveBeenCalledTimes(2) + }) }) }) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index 0dc2a06f1e..d252aa1492 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -1,59 +1,65 @@ import os from "os" import * as path from "path" import * as fs from "fs" -import * as childProcess from "child_process" -import * as vscode from "vscode" -import { arePathsEqual } from "../../utils/path" -import { getBinPath } from "../../services/ripgrep" -import { DIRS_TO_IGNORE } from "./constants" +import simpleGit, { SimpleGit } from "simple-git" +import { globSync } from "glob" +import ignore from "ignore" +import { arePathsEqual, getWorkspacePath } from "../../utils/path" +import { isPathInIgnoredDirectory } from "./ignore-utils" + +function normalizePath(p: string): string { + return p.replace(/\\/g, "/") +} /** - * List files in a directory, with optional recursive traversal + * List files and directories in a given path, with options for recursion and limits. + * This function respects .gitignore rules and a hardcoded list of ignored directory names. * - * @param dirPath - Directory path to list files from - * @param recursive - Whether to recursively list files in subdirectories - * @param limit - Maximum number of files to return - * @returns Tuple of [file paths array, whether the limit was reached] + * @param dirPath - Directory path to list files from. + * @param recursive - Whether to recursively list files in subdirectories. + * @param limit - Maximum number of files to return. + * @returns A tuple containing an array of file paths and a boolean indicating if the limit was reached. */ export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> { - // Handle special directories - const specialResult = await handleSpecialDirectories(dirPath) + const workspacePath = getWorkspacePath() + const absoluteDirPath = path.resolve(dirPath) + const specialResult = handleSpecialDirectories(absoluteDirPath) if (specialResult) { - return specialResult + // Normalizing path for consistency before returning. + const normalizedPaths = specialResult[0].map(normalizePath) + return [normalizedPaths, specialResult[1]] } - // Get ripgrep path - const rgPath = await getRipgrepPath() + const git = simpleGit(workspacePath) + let allAbsolutePaths: string[] - // Get files using ripgrep - const files = await listFilesWithRipgrep(rgPath, dirPath, recursive, limit) + if (recursive) { + allAbsolutePaths = await listAllFilesRecursively(absoluteDirPath, git, workspacePath, limit + 1) + } else { + allAbsolutePaths = await listNonRecursive(absoluteDirPath, git, workspacePath) + } - // Get directories with proper filtering - const gitignorePatterns = await parseGitignoreFile(dirPath, recursive) - const directories = await listFilteredDirectories(dirPath, recursive, gitignorePatterns) + // Filter out any empty strings and apply the custom ignore list as a final pass. + const finalPaths = allAbsolutePaths.filter((p) => p && !isPathInIgnoredDirectory(p)).map(normalizePath) - // Combine and format the results - return formatAndCombineResults(files, directories, limit) + const trimmed = finalPaths.slice(0, limit) + return [trimmed, finalPaths.length > limit] } /** - * Handle special directories (root, home) that should not be fully listed + * Handle special directories (root, home) that should not be fully listed. + * This is a synchronous function as it doesn't perform I/O. */ -async function handleSpecialDirectories(dirPath: string): Promise<[string[], boolean] | null> { - const absolutePath = path.resolve(dirPath) - +function handleSpecialDirectories(absolutePath: string): [string[], boolean] | null { // Do not allow listing files in root directory - const root = process.platform === "win32" ? path.parse(absolutePath).root : "/" - const isRoot = arePathsEqual(absolutePath, root) - if (isRoot) { - return [[root], false] + if (arePathsEqual(path.dirname(absolutePath), absolutePath)) { + return [[absolutePath], false] } // Do not allow listing files in home directory const homeDir = os.homedir() - const isHomeDir = arePathsEqual(absolutePath, homeDir) - if (isHomeDir) { + if (arePathsEqual(absolutePath, homeDir)) { return [[homeDir], false] } @@ -61,349 +67,162 @@ async function handleSpecialDirectories(dirPath: string): Promise<[string[], boo } /** - * Get the path to the ripgrep binary - */ -async function getRipgrepPath(): Promise { - const vscodeAppRoot = vscode.env.appRoot - const rgPath = await getBinPath(vscodeAppRoot) - - if (!rgPath) { - throw new Error("Could not find ripgrep binary") - } - - return rgPath -} - -/** - * List files using ripgrep with appropriate arguments + * Recursively lists all files using the highly optimized `git ls-files` command. + * Falls back to a manual filesystem walk if the git command fails. */ -async function listFilesWithRipgrep( - rgPath: string, - dirPath: string, - recursive: boolean, +async function listAllFilesRecursively( + dir: string, + git: SimpleGit, + workspacePath: string, limit: number, ): Promise { - const absolutePath = path.resolve(dirPath) - const rgArgs = buildRipgrepArgs(absolutePath, recursive) - return execRipgrep(rgPath, rgArgs, limit) -} - -/** - * Build appropriate ripgrep arguments based on whether we're doing a recursive search - */ -function buildRipgrepArgs(dirPath: string, recursive: boolean): string[] { - // Base arguments to list files - const args = ["--files", "--hidden", "--follow"] - - if (recursive) { - return [...args, ...buildRecursiveArgs(), dirPath] - } else { - return [...args, ...buildNonRecursiveArgs(), dirPath] - } -} - -/** - * Build ripgrep arguments for recursive directory traversal - */ -function buildRecursiveArgs(): string[] { - const args: string[] = [] - - // In recursive mode, respect .gitignore by default - // (ripgrep does this automatically) - - // Apply directory exclusions for recursive searches - for (const dir of DIRS_TO_IGNORE) { - args.push("-g", `!**/${dir}/**`) - } - - return args -} - -/** - * Build ripgrep arguments for non-recursive directory listing - */ -function buildNonRecursiveArgs(): string[] { - const args: string[] = [] - - // For non-recursive, limit to the current directory level - args.push("-g", "*") - args.push("--maxdepth", "1") // ripgrep uses maxdepth, not max-depth - - // Don't respect .gitignore in non-recursive mode (consistent with original behavior) - args.push("--no-ignore-vcs") - - // Apply directory exclusions for non-recursive searches - for (const dir of DIRS_TO_IGNORE) { - if (dir === ".*") { - // For hidden files/dirs in non-recursive mode - args.push("-g", "!.*") - } else { - // Direct children only - args.push("-g", `!${dir}`) - args.push("-g", `!${dir}/**`) + try { + // Use git ls-files for a massive performance boost. + // --cached: All files tracked by git. + // --others: All untracked files. + // --exclude-standard: Respects .gitignore, .git/info/exclude, and global git config. + const relativeDir = path.relative(workspacePath, dir) + const args = ["ls-files", "--cached", "--others", "--exclude-standard"] + // Scope the search to the target directory for efficiency + if (relativeDir) { + args.push(relativeDir) } - } - - return args -} - -/** - * Parse the .gitignore file if it exists and is relevant - */ -async function parseGitignoreFile(dirPath: string, recursive: boolean): Promise { - if (!recursive) { - return [] // Only needed for recursive mode - } - - const absolutePath = path.resolve(dirPath) - const gitignorePath = path.join(absolutePath, ".gitignore") - try { - // Check if .gitignore exists - const exists = await fs.promises - .access(gitignorePath) - .then(() => true) - .catch(() => false) + const result = await git.raw(args) - if (!exists) { - return [] - } + if (!result) return [] - // Read and parse .gitignore file - const content = await fs.promises.readFile(gitignorePath, "utf8") - return content + return result .split("\n") - .map((line) => line.trim()) - .filter((line) => line && !line.startsWith("#")) - } catch (err) { - console.warn(`Error reading .gitignore: ${err}`) - return [] // Continue without gitignore patterns on error + .filter(Boolean) // Filter out empty lines + .map((p) => path.join(workspacePath, p)) + } catch (error) { + // Fallback to the manual method if git ls-files fails + // (e.g., not a git repo, or an error with the command). + console.warn("`git ls-files` failed, falling back to manual file search:", error) + return listAllFilesRecursivelyWithFs(dir, git, workspacePath, limit) } } /** - * List directories with appropriate filtering + * A fallback recursive file lister that manually walks the filesystem. + * This is slower but works if Git is not available or fails. */ -async function listFilteredDirectories( - dirPath: string, - recursive: boolean, - gitignorePatterns: string[], +async function listAllFilesRecursivelyWithFs( + dir: string, + _git: SimpleGit, + workspacePath: string, + limit: number, ): Promise { - const absolutePath = path.resolve(dirPath) - const directories: string[] = [] + const result: string[] = [] + const queue: string[] = [dir] - async function scanDirectory(currentPath: string): Promise { - try { - // List all entries in the current directory - const entries = await fs.promises.readdir(currentPath, { withFileTypes: true }) + // Create ignore instance using all .gitignore files in the workspace + const ig = createGitignoreFilter(workspacePath) + + while (queue.length > 0 && result.length < limit) { + const currentDir = queue.shift()! - // Filter for directories only, excluding symbolic links to prevent circular traversal - for (const entry of entries) { - if (entry.isDirectory() && !entry.isSymbolicLink()) { - const dirName = entry.name - const fullDirPath = path.join(currentPath, dirName) + // Pre-check if the directory itself is ignored by custom ignore logic + if (isPathInIgnoredDirectory(currentDir)) { + continue + } - // Check if this directory should be included - if (shouldIncludeDirectory(dirName, recursive, gitignorePatterns)) { - // Add the directory to our results (with trailing slash) - const formattedPath = fullDirPath.endsWith("/") ? fullDirPath : `${fullDirPath}/` - directories.push(formattedPath) + let entries: fs.Dirent[] + try { + entries = await fs.promises.readdir(currentDir, { withFileTypes: true }) + } catch (err) { + continue // Skip unreadable directories + } - // If recursive mode and not a ignored directory, scan subdirectories - if (recursive && !isDirectoryExplicitlyIgnored(dirName)) { - await scanDirectory(fullDirPath) - } - } + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name) + const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") + if (ig.ignores(relPath)) { + if (entry.isDirectory()) { + // Still need to recurse into subdirectories to find non-ignored children + continue + } + continue + } + if (entry.isDirectory()) { + queue.push(fullPath) + } else { + result.push(fullPath) + if (result.length >= limit) { + break } } - } catch (err) { - // Silently continue if we can't read a directory - console.warn(`Could not read directory ${currentPath}: ${err}`) } } - - // Start scanning from the root directory - await scanDirectory(absolutePath) - - return directories + return result } /** - * Determine if a directory should be included in results based on filters + * Creates a filter function that respects nested .gitignore files. + * @param {string} rootDir - The root directory to start scanning from. + * @returns {import('ignore').Ignore} An ignore instance pre-populated with rules. */ -function shouldIncludeDirectory(dirName: string, recursive: boolean, gitignorePatterns: string[]): boolean { - // Skip hidden directories if configured to ignore them - if (dirName.startsWith(".") && DIRS_TO_IGNORE.includes(".*")) { - return false - } - - // Check against explicit ignore patterns - if (isDirectoryExplicitlyIgnored(dirName)) { - return false - } +function createGitignoreFilter(rootDir: string) { + const ig = ignore() + + // Find all .gitignore files recursively + const gitignoreFiles = globSync("**/.gitignore", { + cwd: rootDir, + dot: true, + ignore: ["**/node_modules/**"], + }) - // Check against gitignore patterns in recursive mode - if (recursive && gitignorePatterns.length > 0 && isIgnoredByGitignore(dirName, gitignorePatterns)) { - return false + // Add rules from the root .gitignore first, if it exists + const rootGitignorePath = path.join(rootDir, ".gitignore") + if (fs.existsSync(rootGitignorePath)) { + const rootGitignoreContent = fs.readFileSync(rootGitignorePath, "utf8") + ig.add(rootGitignoreContent) } - return true -} - -/** - * Check if a directory is in our explicit ignore list - */ -function isDirectoryExplicitlyIgnored(dirName: string): boolean { - for (const pattern of DIRS_TO_IGNORE) { - // Exact name matching - if (pattern === dirName) { - return true - } - - // Path patterns that contain / - if (pattern.includes("/")) { - const pathParts = pattern.split("/") - if (pathParts[0] === dirName) { - return true - } + // Process nested .gitignore files + for (const gitignorePath of gitignoreFiles) { + // We already handled the root one + if (path.normalize(gitignorePath) === ".gitignore") { + continue } - } - return false -} + const gitignoreContent = fs.readFileSync(path.join(rootDir, gitignorePath), "utf8") + const gitignoreDir = path.dirname(gitignorePath) + + // Parse lines and make patterns relative to the rootDir + const patterns = gitignoreContent + .split(/\r?\n/) + .filter((line) => line.trim() !== "" && !line.startsWith("#")) + .map((pattern) => { + const isNegated = pattern.startsWith("!") + if (isNegated) { + pattern = pattern.slice(1) + } + const fullPattern = path.join(gitignoreDir, pattern).replace(/\\/g, "/") + return isNegated ? `!${fullPattern}` : fullPattern + }) -/** - * Check if a directory matches any gitignore patterns - */ -function isIgnoredByGitignore(dirName: string, gitignorePatterns: string[]): boolean { - for (const pattern of gitignorePatterns) { - // Directory patterns (ending with /) - if (pattern.endsWith("/")) { - const dirPattern = pattern.slice(0, -1) - if (dirName === dirPattern) { - return true - } - if (pattern.startsWith("**/") && dirName === dirPattern.slice(3)) { - return true - } - } - // Simple name patterns - else if (dirName === pattern) { - return true - } - // Wildcard patterns - else if (pattern.includes("*")) { - const regexPattern = pattern.replace(/\\/g, "\\\\").replace(/\./g, "\\.").replace(/\*/g, ".*") - const regex = new RegExp(`^${regexPattern}$`) - if (regex.test(dirName)) { - return true - } - } + ig.add(patterns) } - return false -} - -/** - * Combine file and directory results and format them properly - */ -function formatAndCombineResults(files: string[], directories: string[], limit: number): [string[], boolean] { - // Combine file paths with directory paths - const allPaths = [...directories, ...files] - - // Deduplicate paths (a directory might appear in both lists) - const uniquePathsSet = new Set(allPaths) - const uniquePaths = Array.from(uniquePathsSet) - - // Sort to ensure directories come first, followed by files - uniquePaths.sort((a: string, b: string) => { - const aIsDir = a.endsWith("/") - const bIsDir = b.endsWith("/") - - if (aIsDir && !bIsDir) return -1 - if (!aIsDir && bIsDir) return 1 - return a.localeCompare(b) - }) - - const trimmedPaths = uniquePaths.slice(0, limit) - return [trimmedPaths, trimmedPaths.length >= limit] + return ig } /** - * Execute ripgrep command and return list of files + * List only top-level files and directories, filtering out ignored ones. */ -async function execRipgrep(rgPath: string, args: string[], limit: number): Promise { - return new Promise((resolve, reject) => { - const rgProcess = childProcess.spawn(rgPath, args) - let output = "" - let results: string[] = [] - - // Set timeout to avoid hanging - const timeoutId = setTimeout(() => { - rgProcess.kill() - console.warn("ripgrep timed out, returning partial results") - resolve(results.slice(0, limit)) - }, 10_000) - - // Process stdout data as it comes in - rgProcess.stdout.on("data", (data) => { - output += data.toString() - processRipgrepOutput() - - // Kill the process if we've reached the limit - if (results.length >= limit) { - rgProcess.kill() - clearTimeout(timeoutId) // Clear the timeout when we kill the process due to reaching the limit - } - }) - - // Process stderr but don't fail on non-zero exit codes - rgProcess.stderr.on("data", (data) => { - console.error(`ripgrep stderr: ${data}`) - }) - - // Handle process completion - rgProcess.on("close", (code) => { - // Clear the timeout to avoid memory leaks - clearTimeout(timeoutId) - - // Process any remaining output - processRipgrepOutput(true) - - // Log non-zero exit codes but don't fail - if (code !== 0 && code !== null && code !== 143 /* SIGTERM */) { - console.warn(`ripgrep process exited with code ${code}, returning partial results`) - } - - resolve(results.slice(0, limit)) - }) - - // Handle process errors - rgProcess.on("error", (error) => { - // Clear the timeout to avoid memory leaks - clearTimeout(timeoutId) - reject(new Error(`ripgrep process error: ${error.message}`)) - }) - - // Helper function to process output buffer - function processRipgrepOutput(isFinal = false) { - const lines = output.split("\n") - - // Keep the last incomplete line unless this is the final processing - if (!isFinal) { - output = lines.pop() || "" - } else { - output = "" - } - - // Process each complete line - for (const line of lines) { - if (line.trim() && results.length < limit) { - results.push(line) - } else if (results.length >= limit) { - break - } - } +async function listNonRecursive(dir: string, _git: SimpleGit, workspacePath: string): Promise { + const entries = await fs.promises.readdir(dir, { withFileTypes: true }) + const ig = createGitignoreFilter(workspacePath) + + const result: string[] = [] + for (const entry of entries) { + const fullPath = path.join(dir, entry.name) + const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") + if (!ig.ignores(relPath) && !isPathInIgnoredDirectory(fullPath)) { + result.push(fullPath) } - }) + } + return result } From d1828091ada1441cfb4a5241519d62800dc768da Mon Sep 17 00:00:00 2001 From: X9VoiD Date: Sun, 6 Jul 2025 14:49:28 +0800 Subject: [PATCH 2/6] fix(glob): assert if listFiles truly skipped --- src/services/glob/__tests__/list-files.spec.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/services/glob/__tests__/list-files.spec.ts b/src/services/glob/__tests__/list-files.spec.ts index 74179ab593..661b42c237 100644 --- a/src/services/glob/__tests__/list-files.spec.ts +++ b/src/services/glob/__tests__/list-files.spec.ts @@ -297,6 +297,7 @@ describe("listFiles", () => { it("should return empty array immediately when limit is 0", async () => { const result = await listFiles("/test/path", true, 0) + expect(getWorkspacePath).toHaveBeenCalledTimes(0) expect(result).toEqual([[], false]) }) }) From 3bc50ea3e6d118f45540fe271f1ed683f9ff1f57 Mon Sep 17 00:00:00 2001 From: X9VoiD Date: Sun, 6 Jul 2025 09:58:44 +0000 Subject: [PATCH 3/6] Skip OS-specific tests when not correct OS --- src/services/glob/__tests__/list-files.spec.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/services/glob/__tests__/list-files.spec.ts b/src/services/glob/__tests__/list-files.spec.ts index 661b42c237..f5c223b154 100644 --- a/src/services/glob/__tests__/list-files.spec.ts +++ b/src/services/glob/__tests__/list-files.spec.ts @@ -66,7 +66,9 @@ describe("listFiles", () => { }) describe("special directories", () => { - it("returns root directory on Windows without listing", async () => { + it("returns root directory on Windows without listing", async (t) => { + t.skip(process.platform !== "win32", "Skipping Windows-specific test on non-Windows environment") + const root = "C:\\" vi.mocked(arePathsEqual).mockImplementation((a, b) => a === b) @@ -76,7 +78,12 @@ describe("listFiles", () => { expect(limitReached).toBe(false) }) - it("returns root directory on POSIX without listing", async () => { + it("returns root directory on POSIX without listing", async (t) => { + t.skip( + process.platform !== "linux" && process.platform !== "darwin", + "Skipping POSIX-specific test on non-Linux environment", + ) + const root = "/" vi.mocked(arePathsEqual).mockImplementation((a, b) => { return path.resolve(a as string) === path.resolve(b as string) From 4273b049b66f0f8cc0813d3fbd38604355caad1f Mon Sep 17 00:00:00 2001 From: X9VoiD Date: Sun, 6 Jul 2025 11:50:40 +0000 Subject: [PATCH 4/6] fix(glob): do not filter out outside workspace --- .../glob/__tests__/list-files.integration.spec.ts | 7 ++++--- src/services/glob/list-files.ts | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/services/glob/__tests__/list-files.integration.spec.ts b/src/services/glob/__tests__/list-files.integration.spec.ts index db52fea6a3..2d4d9040fc 100644 --- a/src/services/glob/__tests__/list-files.integration.spec.ts +++ b/src/services/glob/__tests__/list-files.integration.spec.ts @@ -5,6 +5,7 @@ import * as fs from "fs" import simpleGit from "simple-git" import { listFiles } from "../list-files" import { getWorkspacePath } from "../../../utils/path" +import { randomInt } from "crypto" vi.mock("../../../utils/path", () => ({ getWorkspacePath: vi.fn(), @@ -14,7 +15,7 @@ vi.mock("../../../utils/path", () => ({ describe("listFiles integration tests", () => { let testRepoPath: string - beforeAll(async () => { + beforeEach(async () => { // Create a temporary directory for the Git repo testRepoPath = path.join(os.tmpdir(), `test-repo-${Date.now()}`) await fs.promises.mkdir(testRepoPath, { recursive: true }) @@ -44,7 +45,7 @@ describe("listFiles integration tests", () => { await fs.promises.writeFile(path.join(testRepoPath, "ignored_in_root"), "content") }) - afterAll(async () => { + afterEach(async () => { // Cleanup the temporary directory await fs.promises.rm(testRepoPath, { recursive: true, force: true }) }) @@ -106,7 +107,7 @@ describe("listFiles in non-Git repository", () => { beforeAll(async () => { // Create a temporary directory that is NOT a Git repo - testRepoPath = path.join(os.tmpdir(), `test-non-git-repo-${Date.now()}`) + testRepoPath = path.join(os.tmpdir(), `test-non-git-repo-${Date.now()}${randomInt(256).toString(16)}`) await fs.promises.mkdir(testRepoPath, { recursive: true }) // Create files and directories diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index fc06adf462..e1241bdd0d 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -46,7 +46,9 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb } // Filter out any empty strings and apply the custom ignore list as a final pass. - const finalPaths = allAbsolutePaths.filter((p) => p && !isPathInIgnoredDirectory(p)).map(normalizePath) + const finalPaths = allAbsolutePaths + .filter((p) => p && !isPathInIgnoredDirectory(path.relative(workspacePath, p))) + .map(normalizePath) const trimmed = finalPaths.slice(0, limit) return [trimmed, finalPaths.length > limit] @@ -129,7 +131,7 @@ async function listAllFilesRecursivelyWithFs( const currentDir = queue.shift()! // Pre-check if the directory itself is ignored by custom ignore logic - if (isPathInIgnoredDirectory(currentDir)) { + if (isPathInIgnoredDirectory(path.relative(workspacePath, currentDir))) { continue } @@ -225,7 +227,7 @@ async function listNonRecursive(dir: string, _git: SimpleGit, workspacePath: str for (const entry of entries) { const fullPath = path.join(dir, entry.name) const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") - if (!ig.ignores(relPath) && !isPathInIgnoredDirectory(fullPath)) { + if (!ig.ignores(relPath) && !isPathInIgnoredDirectory(relPath)) { result.push(fullPath) } } From 5eb29ef7270fbf693679cc3cec7af67059530d58 Mon Sep 17 00:00:00 2001 From: X9VoiD Date: Mon, 7 Jul 2025 00:29:29 +0800 Subject: [PATCH 5/6] feat(glob): bring back ripgrep as fallback --- ...spec.ts => list-files.integration.test.ts} | 0 .../glob/__tests__/ripgrep-utils.test.ts | 129 ++++++++++++++ src/services/glob/list-files.ts | 121 +++++++------ src/services/glob/ripgrep-utils.ts | 159 ++++++++++++++++++ 4 files changed, 362 insertions(+), 47 deletions(-) rename src/services/glob/__tests__/{list-files.integration.spec.ts => list-files.integration.test.ts} (100%) create mode 100644 src/services/glob/__tests__/ripgrep-utils.test.ts create mode 100644 src/services/glob/ripgrep-utils.ts diff --git a/src/services/glob/__tests__/list-files.integration.spec.ts b/src/services/glob/__tests__/list-files.integration.test.ts similarity index 100% rename from src/services/glob/__tests__/list-files.integration.spec.ts rename to src/services/glob/__tests__/list-files.integration.test.ts diff --git a/src/services/glob/__tests__/ripgrep-utils.test.ts b/src/services/glob/__tests__/ripgrep-utils.test.ts new file mode 100644 index 0000000000..4de85c64b4 --- /dev/null +++ b/src/services/glob/__tests__/ripgrep-utils.test.ts @@ -0,0 +1,129 @@ +import { vi, describe, it, expect, beforeEach } from "vitest" +import * as path from "path" + +// Mock ripgrep to avoid filesystem dependencies +vi.mock("../../ripgrep", () => ({ + getBinPath: vi.fn().mockResolvedValue("/mock/path/to/rg"), +})) + +// Mock vscode +vi.mock("vscode", () => ({ + env: { + appRoot: "/mock/app/root", + }, +})) + +// Mock filesystem operations +vi.mock("fs", () => ({ + promises: { + access: vi.fn().mockRejectedValue(new Error("Not found")), + readFile: vi.fn().mockResolvedValue(""), + readdir: vi.fn().mockResolvedValue([]), + }, + existsSync: vi.fn().mockImplementation((path) => { + return Promise.resolve(true) + }), + readFileSync: vi.fn().mockImplementation((path) => { + return Promise.resolve("mock content") + }), +})) + +vi.mock("child_process", () => ({ + spawn: vi.fn(), +})) + +vi.mock("../../path", () => ({ + arePathsEqual: vi.fn().mockReturnValue(false), +})) + +import { listFiles } from "../list-files" +import * as childProcess from "child_process" + +describe("list-files symlink support", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it("should include --follow flag in ripgrep arguments", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + // Simulate some output to complete the process + setTimeout(() => callback("test-file.txt\n"), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + if (event === "error") { + // No error simulation + } + }), + kill: vi.fn(), + } + + mockSpawn.mockReturnValue(mockProcess as any) + + // Call listFiles to trigger ripgrep execution + await listFiles("/test/dir", false, 100) + + // Verify that spawn was called with --follow flag (the critical fix) + const [rgPath, args] = mockSpawn.mock.calls[0] + expect(rgPath).toBe("/mock/path/to/rg") + expect(args).toContain("--files") + expect(args).toContain("--hidden") + expect(args).toContain("--follow") // This is the critical assertion - the fix should add this flag + + // Platform-agnostic path check - verify the last argument is the resolved path + const expectedPath = path.resolve("/test/dir") + expect(args[args.length - 1]).toBe(expectedPath) + }) + + it("should include --follow flag for recursive listings too", async () => { + const mockSpawn = vi.mocked(childProcess.spawn) + const mockProcess = { + stdout: { + on: vi.fn((event, callback) => { + if (event === "data") { + setTimeout(() => callback("test-file.txt\n"), 10) + } + }), + }, + stderr: { + on: vi.fn(), + }, + on: vi.fn((event, callback) => { + if (event === "close") { + setTimeout(() => callback(0), 20) + } + if (event === "error") { + // No error simulation + } + }), + kill: vi.fn(), + } + + mockSpawn.mockReturnValue(mockProcess as any) + + // Call listFiles with recursive=true + await listFiles("/test/dir", true, 100) + + // Verify that spawn was called with --follow flag (the critical fix) + const [rgPath, args] = mockSpawn.mock.calls[0] + expect(rgPath).toBe("/mock/path/to/rg") + expect(args).toContain("--files") + expect(args).toContain("--hidden") + expect(args).toContain("--follow") // This should be present in recursive mode too + + // Platform-agnostic path check - verify the last argument is the resolved path + const expectedPath = path.resolve("/test/dir") + expect(args[args.length - 1]).toBe(expectedPath) + }) +}) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index e1241bdd0d..4242d3716e 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -6,6 +6,7 @@ import { globSync } from "glob" import ignore from "ignore" import { arePathsEqual, getWorkspacePath } from "../../utils/path" import { isPathInIgnoredDirectory } from "./ignore-utils" +import { listFilesWithRipgrep } from "./ripgrep-utils" function normalizePath(p: string): string { return p.replace(/\\/g, "/") @@ -106,63 +107,60 @@ async function listAllFilesRecursively( } catch (error) { // Fallback to the manual method if git ls-files fails // (e.g., not a git repo, or an error with the command). - console.warn("`git ls-files` failed, falling back to manual file search:", error) - return listAllFilesRecursivelyWithFs(dir, git, workspacePath, limit) + console.warn("`git ls-files` failed, falling back to ripgrep:", error) + return listAllFilesRecursivelyWithRipGrep(dir, git, workspacePath, limit) } } /** - * A fallback recursive file lister that manually walks the filesystem. - * This is slower but works if Git is not available or fails. + * A fallback recursive file lister that uses ripgrep for performance. + * This is used if Git is not available or fails. */ -async function listAllFilesRecursivelyWithFs( +async function listAllFilesRecursivelyWithRipGrep( dir: string, _git: SimpleGit, workspacePath: string, limit: number, ): Promise { - const result: string[] = [] - const queue: string[] = [dir] - - // Create ignore instance using all .gitignore files in the workspace const ig = createGitignoreFilter(workspacePath) - while (queue.length > 0 && result.length < limit) { - const currentDir = queue.shift()! - - // Pre-check if the directory itself is ignored by custom ignore logic - if (isPathInIgnoredDirectory(path.relative(workspacePath, currentDir))) { - continue - } - - let entries: fs.Dirent[] - try { - entries = await fs.promises.readdir(currentDir, { withFileTypes: true }) - } catch (err) { - continue // Skip unreadable directories - } + let files: string[] = [] + let usedRipgrep = false + try { + files = await listFilesWithRipgrep(dir, true, limit) + usedRipgrep = true + } catch (err) { + console.warn("listFilesWithRipgrep failed:", err) + } - for (const entry of entries) { - const fullPath = path.join(currentDir, entry.name) - const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") - if (ig.ignores(relPath)) { - if (entry.isDirectory()) { - // Still need to recurse into subdirectories to find non-ignored children - continue - } + // If ripgrep failed or returned no files, fallback to manual method + if (!files || files.length === 0) { + const queue: string[] = [dir] + while (queue.length > 0) { + const currentDir = queue.shift()! + if (isPathInIgnoredDirectory(path.relative(workspacePath, currentDir))) { continue } - if (entry.isDirectory()) { - queue.push(fullPath) - } else { - result.push(fullPath) - if (result.length >= limit) { - break + let entries: fs.Dirent[] + try { + entries = await fs.promises.readdir(currentDir, { withFileTypes: true }) + } catch (err) { + continue + } + for (const entry of entries) { + const fullPath = path.join(currentDir, entry.name) + if (entry.isDirectory()) { + queue.push(fullPath) + } else { + files.push(fullPath) } } } } - return result + + // Map to relative paths, filter, then map back to absolute + const filtered = ig.filter(files.map((f) => path.relative(workspacePath, path.resolve(f)).replace(/\\/g, "/"))) + return filtered.slice(0, limit).map((rel) => path.join(workspacePath, rel)) } /** @@ -218,18 +216,47 @@ function createGitignoreFilter(rootDir: string) { /** * List only top-level files and directories, filtering out ignored ones. + * Uses ripgrep for performance and consistency with recursive listing. */ async function listNonRecursive(dir: string, _git: SimpleGit, workspacePath: string): Promise { - const entries = await fs.promises.readdir(dir, { withFileTypes: true }) const ig = createGitignoreFilter(workspacePath) - const result: string[] = [] - for (const entry of entries) { - const fullPath = path.join(dir, entry.name) - const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") - if (!ig.ignores(relPath) && !isPathInIgnoredDirectory(relPath)) { - result.push(fullPath) - } + let files: string[] = [] + try { + files = await listFilesWithRipgrep(dir, false, 10000) + } catch (err) { + console.warn("listFilesWithRipgrep (non-recursive) failed:", err) + } + + // If ripgrep failed or returned no files, fallback to manual method + if (!files || files.length === 0) { + const entries = await fs.promises.readdir(dir, { withFileTypes: true }) + return entries + .map((entry) => path.join(dir, entry.name)) + .filter((fullPath) => { + const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") + return !ig.ignores(relPath) && !isPathInIgnoredDirectory(relPath) + }) } - return result + + const filtered = ig.filter(files.map((f) => path.relative(workspacePath, path.resolve(f)).replace(/\\/g, "/"))) + + // Ripgrep --files only lists files, not directories. Add top-level directories manually. + let dirEntries: string[] = [] + try { + const entries = await fs.promises.readdir(dir, { withFileTypes: true }) + dirEntries = entries + .filter((entry) => entry.isDirectory()) + .map((entry) => path.join(dir, entry.name)) + .filter((fullPath) => { + const relPath = path.relative(workspacePath, fullPath).replace(/\\/g, "/") + return !ig.ignores(relPath) && !isPathInIgnoredDirectory(relPath) + }) + } catch (err) { + // ignore + } + + // Enforce limit after combining files and directories + const combined = [...filtered.map((rel) => path.join(workspacePath, rel)), ...dirEntries] + return combined.slice(0, 10000) // 10000 is the same as above, will be sliced by caller } diff --git a/src/services/glob/ripgrep-utils.ts b/src/services/glob/ripgrep-utils.ts new file mode 100644 index 0000000000..6865948dbe --- /dev/null +++ b/src/services/glob/ripgrep-utils.ts @@ -0,0 +1,159 @@ +import * as childProcess from "child_process" +import * as vscode from "vscode" +import * as path from "path" +import { getBinPath } from "../../services/ripgrep" +import { DIRS_TO_IGNORE } from "./constants" + +/** + * List files using ripgrep with appropriate arguments + */ +export async function listFilesWithRipgrep(dirPath: string, recursive: boolean, limit: number): Promise { + const vscodeAppRoot = vscode.env.appRoot + const rgPath = await getBinPath(vscodeAppRoot) + + if (!rgPath) { + throw new Error("Could not find ripgrep binary") + } + + const absolutePath = path.resolve(dirPath) + const rgArgs = buildRipgrepArgs(absolutePath, recursive) + return execRipgrep(rgPath, rgArgs, limit) +} + +/** + * Build appropriate ripgrep arguments based on whether we're doing a recursive search + */ +function buildRipgrepArgs(dirPath: string, recursive: boolean): string[] { + // Base arguments to list files + const args = ["--files", "--hidden", "--follow"] + + if (recursive) { + return [...args, ...buildRecursiveArgs(), dirPath] + } else { + return [...args, ...buildNonRecursiveArgs(), dirPath] + } +} + +/** + * Build ripgrep arguments for recursive directory traversal + */ +function buildRecursiveArgs(): string[] { + const args: string[] = [] + + // In recursive mode, respect .gitignore by default + // (ripgrep does this automatically) + + // Apply directory exclusions for recursive searches + for (const dir of DIRS_TO_IGNORE) { + args.push("-g", `!**/${dir}/**`) + } + + return args +} + +/** + * Build ripgrep arguments for non-recursive directory listing + */ +function buildNonRecursiveArgs(): string[] { + const args: string[] = [] + + // For non-recursive, limit to the current directory level + args.push("-g", "*") + args.push("--maxdepth", "1") // ripgrep uses maxdepth, not max-depth + + // Don't respect .gitignore in non-recursive mode (consistent with original behavior) + args.push("--no-ignore-vcs") + + // Apply directory exclusions for non-recursive searches + for (const dir of DIRS_TO_IGNORE) { + if (dir === ".*") { + // For hidden files/dirs in non-recursive mode + args.push("-g", "!.*") + } else { + // Direct children only + args.push("-g", `!${dir}`) + args.push("-g", `!${dir}/**`) + } + } + + return args +} + +/** + * Execute ripgrep command and return list of files + */ +async function execRipgrep(rgPath: string, args: string[], limit: number): Promise { + return new Promise((resolve, reject) => { + const rgProcess = childProcess.spawn(rgPath, args) + let output = "" + let results: string[] = [] + + // Set timeout to avoid hanging + const timeoutId = setTimeout(() => { + rgProcess.kill() + console.warn("ripgrep timed out, returning partial results") + resolve(results.slice(0, limit)) + }, 10_000) + + // Process stdout data as it comes in + rgProcess.stdout.on("data", (data) => { + output += data.toString() + processRipgrepOutput() + + // Kill the process if we've reached the limit + if (results.length >= limit) { + rgProcess.kill() + clearTimeout(timeoutId) // Clear the timeout when we kill the process due to reaching the limit + } + }) + + // Process stderr but don't fail on non-zero exit codes + rgProcess.stderr.on("data", (data) => { + console.error(`ripgrep stderr: ${data}`) + }) + + // Handle process completion + rgProcess.on("close", (code) => { + // Clear the timeout to avoid memory leaks + clearTimeout(timeoutId) + + // Process any remaining output + processRipgrepOutput(true) + + // Log non-zero exit codes but don't fail + if (code !== 0 && code !== null && code !== 143 /* SIGTERM */) { + console.warn(`ripgrep process exited with code ${code}, returning partial results`) + } + + resolve(results.slice(0, limit)) + }) + + // Handle process errors + rgProcess.on("error", (error) => { + // Clear the timeout to avoid memory leaks + clearTimeout(timeoutId) + reject(new Error(`ripgrep process error: ${error.message}`)) + }) + + // Helper function to process output buffer + function processRipgrepOutput(isFinal = false) { + const lines = output.split("\n") + + // Keep the last incomplete line unless this is the final processing + if (!isFinal) { + output = lines.pop() || "" + } else { + output = "" + } + + // Process each complete line + for (const line of lines) { + if (line.trim() && results.length < limit) { + results.push(line) + } else if (results.length >= limit) { + break + } + } + } + }) +} From 65ddebcb5f99fe0e6c4f53762b80fb9b22dcea97 Mon Sep 17 00:00:00 2001 From: X9VoiD Date: Mon, 7 Jul 2025 00:39:03 +0800 Subject: [PATCH 6/6] fix(glob): pass limit to listNonRecursive --- src/services/glob/list-files.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index 4242d3716e..50318f845d 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -43,7 +43,7 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb if (recursive) { allAbsolutePaths = await listAllFilesRecursively(absoluteDirPath, git, workspacePath, limit + 1) } else { - allAbsolutePaths = await listNonRecursive(absoluteDirPath, git, workspacePath) + allAbsolutePaths = await listNonRecursive(absoluteDirPath, git, workspacePath, limit + 1) } // Filter out any empty strings and apply the custom ignore list as a final pass. @@ -218,12 +218,12 @@ function createGitignoreFilter(rootDir: string) { * List only top-level files and directories, filtering out ignored ones. * Uses ripgrep for performance and consistency with recursive listing. */ -async function listNonRecursive(dir: string, _git: SimpleGit, workspacePath: string): Promise { +async function listNonRecursive(dir: string, _git: SimpleGit, workspacePath: string, limit: number): Promise { const ig = createGitignoreFilter(workspacePath) let files: string[] = [] try { - files = await listFilesWithRipgrep(dir, false, 10000) + files = await listFilesWithRipgrep(dir, false, limit) } catch (err) { console.warn("listFilesWithRipgrep (non-recursive) failed:", err) } @@ -257,6 +257,6 @@ async function listNonRecursive(dir: string, _git: SimpleGit, workspacePath: str } // Enforce limit after combining files and directories - const combined = [...filtered.map((rel) => path.join(workspacePath, rel)), ...dirEntries] - return combined.slice(0, 10000) // 10000 is the same as above, will be sliced by caller + const combined = [...dirEntries, ...filtered.map((rel) => path.join(workspacePath, rel))] + return combined.slice(0, limit) }