diff --git a/src/__mocks__/services/ripgrep/index.ts b/src/__mocks__/services/ripgrep/index.ts new file mode 100644 index 0000000000..f24adcb18d --- /dev/null +++ b/src/__mocks__/services/ripgrep/index.ts @@ -0,0 +1,48 @@ +/** + * Mock implementation for the ripgrep service + * + * This mock provides stable implementations of all ripgrep service functions, + * making sure to handle undefined values safely to prevent test failures. + * Each function is documented with its purpose and behavior in tests. + */ + +/** + * Mock implementation of getBinPath + * Always returns a valid path to avoid path resolution errors in tests + * + * @param vscodeAppRoot - Optional VSCode app root path (can be undefined) + * @returns Promise resolving to a mock path to the ripgrep binary + */ +export const getBinPath = jest.fn().mockImplementation(async (vscodeAppRoot?: string): Promise => { + return "/mock/path/to/rg" +}) + +/** + * Mock implementation of regexSearchFiles + * Always returns a static search result string to avoid executing real searches + * + * @param cwd - Optional working directory (can be undefined) + * @param directoryPath - Optional directory to search (can be undefined) + * @param regex - Optional regex pattern (can be undefined) + * @param filePattern - Optional file pattern (can be undefined) + * @returns Promise resolving to a mock search result + */ +export const regexSearchFiles = jest + .fn() + .mockImplementation( + async (cwd?: string, directoryPath?: string, regex?: string, filePattern?: string): Promise => { + return "Mock search results" + }, + ) + +/** + * Mock implementation of truncateLine + * Returns the input line or empty string if undefined + * + * @param line - The line to truncate (can be undefined) + * @param maxLength - Optional maximum length (can be undefined) + * @returns The original line or empty string if undefined + */ +export const truncateLine = jest.fn().mockImplementation((line?: string, maxLength?: number): string => { + return line || "" +}) diff --git a/src/core/__mocks__/mock-setup.ts b/src/core/__mocks__/mock-setup.ts new file mode 100644 index 0000000000..3d77f9fee9 --- /dev/null +++ b/src/core/__mocks__/mock-setup.ts @@ -0,0 +1,39 @@ +/** + * Mock setup for Cline tests + * + * This file contains centralized mock configurations for services + * that require special handling in tests. It prevents test failures + * related to undefined values, missing dependencies, or filesystem access. + * + * Services mocked here: + * - ripgrep: Prevents path.join issues with undefined parameters + * - list-files: Prevents dependency on actual ripgrep binary + */ + +/** + * Mock the ripgrep service + * This prevents issues with path.join and undefined parameters in tests + */ +jest.mock("../../services/ripgrep", () => ({ + // Always returns a valid path to the ripgrep binary + getBinPath: jest.fn().mockResolvedValue("/mock/path/to/rg"), + + // Returns static search results + regexSearchFiles: jest.fn().mockResolvedValue("Mock search results"), + + // Safe implementation of truncateLine that handles edge cases + truncateLine: jest.fn().mockImplementation((line: string) => line || ""), +})) + +/** + * Mock the list-files module + * This prevents dependency on the ripgrep binary and filesystem access + */ +jest.mock("../../services/glob/list-files", () => ({ + // Returns empty file list with boolean flag indicating if limit was reached + listFiles: jest.fn().mockImplementation(() => { + return Promise.resolve([[], false]) + }), +})) + +export {} diff --git a/src/core/__tests__/Cline.test.ts b/src/core/__tests__/Cline.test.ts index 099d112019..cbad4b1f95 100644 --- a/src/core/__tests__/Cline.test.ts +++ b/src/core/__tests__/Cline.test.ts @@ -367,7 +367,31 @@ describe("Cline", () => { }) describe("API conversation handling", () => { + /** + * Mock environment details retrieval to avoid filesystem access in tests + * + * This setup: + * 1. Prevents file listing operations that might cause test instability + * 2. Preserves test-specific mocks when they exist (via _mockGetEnvironmentDetails) + * 3. Provides a stable, empty environment by default + */ + beforeEach(() => { + // Mock the method with a stable implementation + jest.spyOn(Cline.prototype, "getEnvironmentDetails").mockImplementation( + // Use 'any' type to allow for dynamic test properties + async function (this: any, verbose: boolean = false): Promise { + // Use test-specific mock if available + if (this._mockGetEnvironmentDetails) { + return this._mockGetEnvironmentDetails() + } + // Default to empty environment details for stability + return "" + }, + ) + }) + it("should clean conversation history before sending to API", async () => { + // Cline.create will now use our mocked getEnvironmentDetails const [cline, task] = Cline.create({ provider: mockProvider, apiConfiguration: mockApiConfig, diff --git a/src/services/glob/__mocks__/list-files.ts b/src/services/glob/__mocks__/list-files.ts new file mode 100644 index 0000000000..68bc60e730 --- /dev/null +++ b/src/services/glob/__mocks__/list-files.ts @@ -0,0 +1,70 @@ +/** + * Mock implementation of list-files module + * + * IMPORTANT NOTES: + * 1. This file must be placed in src/services/glob/__mocks__/ to properly mock the module + * 2. DO NOT IMPORT any modules from the application code to avoid circular dependencies + * 3. All dependencies are mocked/stubbed locally for isolation + * + * This implementation provides predictable behavior for tests without requiring + * actual filesystem access or ripgrep binary. + */ + +/** + * Mock function for path resolving without importing path module + * Provides basic path resolution for testing + * + * @param dirPath - Directory path to resolve + * @returns Absolute mock path + */ +const mockResolve = (dirPath: string): string => { + return dirPath.startsWith("/") ? dirPath : `/mock/path/${dirPath}` +} + +/** + * Mock function to check if paths are equal without importing path module + * Provides simple equality comparison for testing + * + * @param path1 - First path to compare + * @param path2 - Second path to compare + * @returns Whether paths are equal + */ +const mockArePathsEqual = (path1: string, path2: string): boolean => { + return path1 === path2 +} + +/** + * Mock implementation of listFiles function + * Returns different results based on input path for testing different scenarios + * + * @param dirPath - Directory path to list files from + * @param recursive - Whether to list files recursively + * @param limit - Maximum number of files to return + * @returns Promise resolving to [file paths, limit reached flag] + */ +export const listFiles = jest.fn((dirPath: string, recursive: boolean, limit: number) => { + // Special case: Root or home directories + // Prevents tests from trying to list all files in these directories + if (dirPath === "/" || dirPath === "/root" || dirPath === "/home/user") { + return Promise.resolve([[dirPath], false]) + } + + // Special case: Tree-sitter tests + // Some tests expect the second value to be a Set instead of a boolean + if (dirPath.includes("test/path")) { + return Promise.resolve([[], new Set()]) + } + + // Special case: For testing directories with actual content + if (dirPath.includes("mock/content")) { + const mockFiles = [ + `${mockResolve(dirPath)}/file1.txt`, + `${mockResolve(dirPath)}/file2.js`, + `${mockResolve(dirPath)}/folder1/`, + ] + return Promise.resolve([mockFiles, false]) + } + + // Default case: Return empty list for most tests + return Promise.resolve([[], false]) +}) diff --git a/src/services/glob/list-files.ts b/src/services/glob/list-files.ts index c7e3d41cf0..6d30930ecb 100644 --- a/src/services/glob/list-files.ts +++ b/src/services/glob/list-files.ts @@ -1,97 +1,412 @@ -import { globby, Options } from "globby" import os from "os" import * as path from "path" +import * as fs from "fs" +import * as childProcess from "child_process" +import * as vscode from "vscode" import { arePathsEqual } from "../../utils/path" +import { getBinPath } from "../../services/ripgrep" +/** + * List of directories that are typically large and should be ignored + * when showing recursive file listings + */ +const DIRS_TO_IGNORE = [ + "node_modules", + "__pycache__", + "env", + "venv", + "target/dependency", + "build/dependencies", + "dist", + "out", + "bundle", + "vendor", + "tmp", + "temp", + "deps", + "pkg", + "Pods", + ".*", +] + +/** + * List files in a directory, with optional recursive traversal + * + * @param dirPath - Directory path to list files from + * @param recursive - Whether to recursively list files in subdirectories + * @param limit - Maximum number of files to return + * @returns Tuple of [file paths array, whether the limit was reached] + */ export async function listFiles(dirPath: string, recursive: boolean, limit: number): Promise<[string[], boolean]> { + // Handle special directories + const specialResult = await handleSpecialDirectories(dirPath) + if (specialResult) { + return specialResult + } + + // Get ripgrep path + const rgPath = await getRipgrepPath() + + // Get files using ripgrep + const files = await listFilesWithRipgrep(rgPath, dirPath, recursive, limit) + + // Get directories with proper filtering + const gitignorePatterns = await parseGitignoreFile(dirPath, recursive) + const directories = await listFilteredDirectories(dirPath, recursive, gitignorePatterns) + + // Combine and format the results + return formatAndCombineResults(files, directories, limit) +} + +/** + * Handle special directories (root, home) that should not be fully listed + */ +async function handleSpecialDirectories(dirPath: string): Promise<[string[], boolean] | null> { const absolutePath = path.resolve(dirPath) - // Do not allow listing files in root or home directory, which cline tends to want to do when the user's prompt is vague. + + // Do not allow listing files in root directory const root = process.platform === "win32" ? path.parse(absolutePath).root : "/" const isRoot = arePathsEqual(absolutePath, root) if (isRoot) { return [[root], false] } + + // Do not allow listing files in home directory const homeDir = os.homedir() const isHomeDir = arePathsEqual(absolutePath, homeDir) if (isHomeDir) { return [[homeDir], false] } - const dirsToIgnore = [ - "node_modules", - "__pycache__", - "env", - "venv", - "target/dependency", - "build/dependencies", - "dist", - "out", - "bundle", - "vendor", - "tmp", - "temp", - "deps", - "pkg", - "Pods", - ".*", // '!**/.*' excludes hidden directories, while '!**/.*/**' excludes only their contents. This way we are at least aware of the existence of hidden directories. - ].map((dir) => `${dirPath}/**/${dir}/**`) - - const options = { - cwd: dirPath, - dot: true, // do not ignore hidden files/directories - absolute: true, - markDirectories: true, // Append a / on any directories matched (/ is used on windows as well, so dont use path.sep) - gitignore: recursive, // globby ignores any files that are gitignored - ignore: recursive ? dirsToIgnore : undefined, // just in case there is no gitignore, we ignore sensible defaults - onlyFiles: false, // true by default, false means it will list directories on their own too + return null +} + +/** + * Get the path to the ripgrep binary + */ +async function getRipgrepPath(): Promise { + const vscodeAppRoot = vscode.env.appRoot + const rgPath = await getBinPath(vscodeAppRoot) + + if (!rgPath) { + throw new Error("Could not find ripgrep binary") } - // * globs all files in one dir, ** globs files in nested directories - const files = recursive ? await globbyLevelByLevel(limit, options) : (await globby("*", options)).slice(0, limit) - return [files, files.length >= limit] + + return rgPath } -/* -Breadth-first traversal of directory structure level by level up to a limit: - - Queue-based approach ensures proper breadth-first traversal - - Processes directory patterns level by level - - Captures a representative sample of the directory structure up to the limit - - Minimizes risk of missing deeply nested files - -- Notes: - - Relies on globby to mark directories with / - - Potential for loops if symbolic links reference back to parent (we could use followSymlinks: false but that may not be ideal for some projects and it's pointless if they're not using symlinks wrong) - - Timeout mechanism prevents infinite loops -*/ -async function globbyLevelByLevel(limit: number, options?: Options) { - let results: Set = new Set() - let queue: string[] = ["*"] - - const globbingProcess = async () => { - while (queue.length > 0 && results.size < limit) { - const pattern = queue.shift()! - const filesAtLevel = await globby(pattern, options) - - for (const file of filesAtLevel) { - if (results.size >= limit) { - break - } - results.add(file) - if (file.endsWith("/")) { - queue.push(`${file}*`) - } - } +/** + * List files using ripgrep with appropriate arguments + */ +async function listFilesWithRipgrep( + rgPath: string, + dirPath: string, + recursive: boolean, + limit: number, +): Promise { + const absolutePath = path.resolve(dirPath) + const rgArgs = buildRipgrepArgs(absolutePath, recursive) + return execRipgrep(rgPath, rgArgs, limit) +} + +/** + * Build appropriate ripgrep arguments based on whether we're doing a recursive search + */ +function buildRipgrepArgs(dirPath: string, recursive: boolean): string[] { + // Base arguments to list files + const args = ["--files", "--hidden"] + + if (recursive) { + return [...args, ...buildRecursiveArgs(), dirPath] + } else { + return [...args, ...buildNonRecursiveArgs(), dirPath] + } +} + +/** + * Build ripgrep arguments for recursive directory traversal + */ +function buildRecursiveArgs(): string[] { + const args: string[] = [] + + // In recursive mode, respect .gitignore by default + // (ripgrep does this automatically) + + // Apply directory exclusions for recursive searches + for (const dir of DIRS_TO_IGNORE) { + args.push("-g", `!**/${dir}/**`) + } + + return args +} + +/** + * Build ripgrep arguments for non-recursive directory listing + */ +function buildNonRecursiveArgs(): string[] { + const args: string[] = [] + + // For non-recursive, limit to the current directory level + args.push("-g", "*") + args.push("--maxdepth", "1") // ripgrep uses maxdepth, not max-depth + + // Don't respect .gitignore in non-recursive mode (consistent with original behavior) + args.push("--no-ignore-vcs") + + // Apply directory exclusions for non-recursive searches + for (const dir of DIRS_TO_IGNORE) { + if (dir === ".*") { + // For hidden files/dirs in non-recursive mode + args.push("-g", "!.*") + } else { + // Direct children only + args.push("-g", `!${dir}`) + args.push("-g", `!${dir}/**`) } - return Array.from(results).slice(0, limit) } - // Timeout after 10 seconds and return partial results - const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => reject(new Error("Globbing timeout")), 10_000) - }) + return args +} + +/** + * Parse the .gitignore file if it exists and is relevant + */ +async function parseGitignoreFile(dirPath: string, recursive: boolean): Promise { + if (!recursive) { + return [] // Only needed for recursive mode + } + + const absolutePath = path.resolve(dirPath) + const gitignorePath = path.join(absolutePath, ".gitignore") + try { - return await Promise.race([globbingProcess(), timeoutPromise]) - } catch (error) { - console.warn("Globbing timed out, returning partial results") - return Array.from(results) + // Check if .gitignore exists + const exists = await fs.promises + .access(gitignorePath) + .then(() => true) + .catch(() => false) + + if (!exists) { + return [] + } + + // Read and parse .gitignore file + const content = await fs.promises.readFile(gitignorePath, "utf8") + return content + .split("\n") + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith("#")) + } catch (err) { + console.warn(`Error reading .gitignore: ${err}`) + return [] // Continue without gitignore patterns on error } } + +/** + * List directories with appropriate filtering + */ +async function listFilteredDirectories( + dirPath: string, + recursive: boolean, + gitignorePatterns: string[], +): Promise { + const absolutePath = path.resolve(dirPath) + + try { + // List all entries in the directory + const entries = await fs.promises.readdir(absolutePath, { withFileTypes: true }) + + // Filter for directories only + const directories = entries + .filter((entry) => entry.isDirectory()) + .filter((entry) => { + return shouldIncludeDirectory(entry.name, recursive, gitignorePatterns) + }) + .map((entry) => path.join(absolutePath, entry.name)) + + // Format directory paths with trailing slash + return directories.map((dir) => (dir.endsWith("/") ? dir : `${dir}/`)) + } catch (err) { + console.error(`Error listing directories: ${err}`) + return [] // Return empty array on error + } +} + +/** + * Determine if a directory should be included in results based on filters + */ +function shouldIncludeDirectory(dirName: string, recursive: boolean, gitignorePatterns: string[]): boolean { + // Skip hidden directories if configured to ignore them + if (dirName.startsWith(".") && DIRS_TO_IGNORE.includes(".*")) { + return false + } + + // Check against explicit ignore patterns + if (isDirectoryExplicitlyIgnored(dirName)) { + return false + } + + // Check against gitignore patterns in recursive mode + if (recursive && gitignorePatterns.length > 0 && isIgnoredByGitignore(dirName, gitignorePatterns)) { + return false + } + + return true +} + +/** + * Check if a directory is in our explicit ignore list + */ +function isDirectoryExplicitlyIgnored(dirName: string): boolean { + for (const pattern of DIRS_TO_IGNORE) { + // Exact name matching + if (pattern === dirName) { + return true + } + + // Path patterns that contain / + if (pattern.includes("/")) { + const pathParts = pattern.split("/") + if (pathParts[0] === dirName) { + return true + } + } + } + + return false +} + +/** + * Check if a directory matches any gitignore patterns + */ +function isIgnoredByGitignore(dirName: string, gitignorePatterns: string[]): boolean { + for (const pattern of gitignorePatterns) { + // Directory patterns (ending with /) + if (pattern.endsWith("/")) { + const dirPattern = pattern.slice(0, -1) + if (dirName === dirPattern) { + return true + } + if (pattern.startsWith("**/") && dirName === dirPattern.slice(3)) { + return true + } + } + // Simple name patterns + else if (dirName === pattern) { + return true + } + // Wildcard patterns + else if (pattern.includes("*")) { + const regexPattern = pattern.replace(/\\/g, "\\\\").replace(/\./g, "\\.").replace(/\*/g, ".*") + const regex = new RegExp(`^${regexPattern}$`) + if (regex.test(dirName)) { + return true + } + } + } + + return false +} + +/** + * Combine file and directory results and format them properly + */ +function formatAndCombineResults(files: string[], directories: string[], limit: number): [string[], boolean] { + // Combine file paths with directory paths + const allPaths = [...directories, ...files] + + // Deduplicate paths (a directory might appear in both lists) + const uniquePaths = [...new Set(allPaths)] + + // Sort to ensure directories come first, followed by files + uniquePaths.sort((a: string, b: string) => { + const aIsDir = a.endsWith("/") + const bIsDir = b.endsWith("/") + + if (aIsDir && !bIsDir) return -1 + if (!aIsDir && bIsDir) return 1 + return a.localeCompare(b) + }) + + const trimmedPaths = uniquePaths.slice(0, limit) + return [trimmedPaths, trimmedPaths.length >= limit] +} + +/** + * Execute ripgrep command and return list of files + */ +async function execRipgrep(rgPath: string, args: string[], limit: number): Promise { + return new Promise((resolve, reject) => { + const rgProcess = childProcess.spawn(rgPath, args) + let output = "" + let results: string[] = [] + + // Set timeout to avoid hanging + const timeoutId = setTimeout(() => { + rgProcess.kill() + console.warn("ripgrep timed out, returning partial results") + resolve(results.slice(0, limit)) + }, 10_000) + + // Process stdout data as it comes in + rgProcess.stdout.on("data", (data) => { + output += data.toString() + processRipgrepOutput() + + // Kill the process if we've reached the limit + if (results.length >= limit) { + rgProcess.kill() + clearTimeout(timeoutId) // Clear the timeout when we kill the process due to reaching the limit + } + }) + + // Process stderr but don't fail on non-zero exit codes + rgProcess.stderr.on("data", (data) => { + console.error(`ripgrep stderr: ${data}`) + }) + + // Handle process completion + rgProcess.on("close", (code) => { + // Clear the timeout to avoid memory leaks + clearTimeout(timeoutId) + + // Process any remaining output + processRipgrepOutput(true) + + // Log non-zero exit codes but don't fail + if (code !== 0 && code !== null && code !== 143 /* SIGTERM */) { + console.warn(`ripgrep process exited with code ${code}, returning partial results`) + } + + resolve(results.slice(0, limit)) + }) + + // Handle process errors + rgProcess.on("error", (error) => { + // Clear the timeout to avoid memory leaks + clearTimeout(timeoutId) + reject(new Error(`ripgrep process error: ${error.message}`)) + }) + + // Helper function to process output buffer + function processRipgrepOutput(isFinal = false) { + const lines = output.split("\n") + + // Keep the last incomplete line unless this is the final processing + if (!isFinal) { + output = lines.pop() || "" + } else { + output = "" + } + + // Process each complete line + for (const line of lines) { + if (line.trim() && results.length < limit) { + results.push(line) + } else if (results.length >= limit) { + break + } + } + } + }) +}