RooCodeInc · roomote · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025
@@ -11,7 +11,7 @@ import { fileExistsAtPath } from "../../utils/fs"
 import { executeRipgrep } from "../../services/search/file-search"
 
 import { CheckpointDiff, CheckpointResult, CheckpointEventMap } from "./types"
-import { getExcludePatterns } from "./excludes"
+import { getExcludePatterns, getExcludePatternsWithStats } from "./excludes"
 
 export abstract class ShadowCheckpointService extends EventEmitter {
 	public readonly taskId: string
@@ -95,7 +95,12 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 				)
 			}
 
-			await this.writeExcludeFile()
+			// Only regenerate exclude file if it doesn't exist
+			const excludePath = path.join(this.dotGitDir, "info", "exclude")
+			if (!(await fileExistsAtPath(excludePath))) {
+				this.log(`[${this.constructor.name}#initShadowGit] exclude file missing, regenerating`)
+				await this.writeExcludeFile()
+			}
 			this.baseHash = await git.revparse(["HEAD"])
 		} else {
 			this.log(`[${this.constructor.name}#initShadowGit] creating shadow git repo at ${this.checkpointsDir}`)
@@ -104,7 +109,8 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 			await git.addConfig("commit.gpgSign", "false") // Disable commit signing for shadow repo.
 			await git.addConfig("user.name", "Roo Code")
 			await git.addConfig("user.email", "[email protected]")
-			await this.writeExcludeFile()
+			// Force write exclude file on initial creation (git creates a default one)
+			await this.writeExcludeFile(true)
 			await this.stageAll(git)
 			const { commit } = await git.commit("initial commit", { "--allow-empty": null })
 			this.baseHash = commit
@@ -137,10 +143,44 @@ export abstract class ShadowCheckpointService extends EventEmitter {
 	// .git/info/exclude is local to the shadow git repo, so it's not
 	// shared with the main repo - and won't conflict with user's
 	// .gitignore.
-	protected async writeExcludeFile() {
+	// Note: This is only called on initial creation or when the exclude file is missing
+	// to avoid expensive scans on every initialization.
+	protected async writeExcludeFile(forceRefresh: boolean = false) {
+		// Skip if exclude file exists and not forcing refresh
+		if (!forceRefresh) {
+			const excludePath = path.join(this.dotGitDir, "info", "exclude")
+			if (await fileExistsAtPath(excludePath)) {
+				this.log(`[${this.constructor.name}#writeExcludeFile] exclude file exists, skipping regeneration`)
+				return
+			}
+		}
+
 		await fs.mkdir(path.join(this.dotGitDir, "info"), { recursive: true })
-		const patterns = await getExcludePatterns(this.workspaceDir)
+		const { patterns, stats } = await getExcludePatternsWithStats(this.workspaceDir)
 		await fs.writeFile(path.join(this.dotGitDir, "info", "exclude"), patterns.join("\n"))
+
+		const mb = Math.round(stats.thresholdBytes / (1024 * 1024))
+
+		if (stats?.largeFilesExcluded && stats.largeFilesExcluded > 0) {
-		if (stats?.largeFilesExcluded && stats.largeFilesExcluded > 0) {
+		if (stats.largeFilesExcluded > 0) {
-		if (stats?.largeFilesExcluded && stats.largeFilesExcluded > 0) {
+		if (stats.largeFilesExcluded > 0) {
+			this.log(
+				`[${this.constructor.name}#writeExcludeFile] auto-excluding ${stats.largeFilesExcluded} large files (>= ${mb}MB) from checkpoints. Sample: ${stats.sample.join(", ")}`,
+			)
+		}
+
+		if (stats?.errorCounts && (stats.errorCounts.ripgrepErrors > 0 || stats.errorCounts.fsStatErrors > 0)) {
+			this.log(
+				`[${this.constructor.name}#writeExcludeFile] auto-exclude encountered errors (ripgrepErrors=${stats.errorCounts.ripgrepErrors}, fsStatErrors=${stats.errorCounts.fsStatErrors}). Check environment and filesystem permissions.`,
+			)
+		}
+	}
+
+	// Public method to allow manual refresh of exclude patterns if needed
+	public async refreshExcludePatterns() {
+		if (!this.git) {
+			throw new Error("Shadow git repo not initialized")
+		}
+		this.log(`[${this.constructor.name}#refreshExcludePatterns] manually refreshing exclude patterns`)
+		await this.writeExcludeFile(true)
 	}
 
 	private async stageAll(git: SimpleGit) {

@@ -295,6 +295,11 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
 			})
 
 			it("does not create a checkpoint for ignored files", async () => {
+				// Verify that the exclude file was created during initialization
+				const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
+				const excludeContent = await fs.readFile(excludesPath, "utf-8")
+				expect(excludeContent).toContain("*.log")
+
 				// Create a file that matches an ignored pattern (e.g., .log file).
 				const ignoredFile = path.join(service.workspaceDir, "ignored.log")
 				await fs.writeFile(ignoredFile, "Initial ignored content")
@@ -315,10 +320,12 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
 				const gitattributesPath = path.join(service.workspaceDir, ".gitattributes")
 				await fs.writeFile(gitattributesPath, "*.lfs filter=lfs diff=lfs merge=lfs -text")
 
+				// Delete the exclude file to force regeneration with new LFS patterns
+				const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
+				await fs.unlink(excludesPath).catch(() => {}) // Ignore error if file doesn't exist
+
 				// Re-initialize the service to trigger a write to .git/info/exclude.
 				service = new klass(service.taskId, service.checkpointsDir, service.workspaceDir, () => {})
-				const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
-				expect((await fs.readFile(excludesPath, "utf-8")).split("\n")).not.toContain("*.lfs")
 				await service.initShadowGit()
 				expect((await fs.readFile(excludesPath, "utf-8")).split("\n")).toContain("*.lfs")
 

@@ -3,12 +3,14 @@
 import { join } from "path"
 import fs from "fs/promises"
 import { fileExistsAtPath } from "../../../utils/fs"
-import { getExcludePatterns } from "../excludes"
+import { getExcludePatterns, getExcludePatternsWithStats } from "../excludes"
+import { executeRipgrep } from "../../search/file-search"
 
 // Mock fs/promises
 vi.mock("fs/promises", () => ({
 	default: {
 		readFile: vi.fn(),
+		stat: vi.fn(),
 	},
 }))
 
@@ -17,6 +19,12 @@ vi.mock("../../../utils/fs", () => ({
 	fileExistsAtPath: vi.fn(),
 }))
 
+// Mock executeRipgrep
+vi.mock("../../search/file-search", () => ({
+	executeRipgrep: vi.fn(),
+	executeRipgrepForFiles: vi.fn(),
+}))
+
 describe("getExcludePatterns", () => {
 	const testWorkspacePath = "/test/workspace"
 
@@ -151,5 +159,193 @@ readme.md text
 			expect(excludePatterns).toContain("*.shp") // geospatial
 			expect(excludePatterns).toContain("*.log") // log
 		})
+		it("should include Windows Thumbs.db cache pattern", async () => {
+			// Mock .gitattributes file doesn't exist
+			vi.mocked(fileExistsAtPath).mockResolvedValue(false)
+
+			// Get exclude patterns
+			const excludePatterns = await getExcludePatterns(testWorkspacePath)
+
+			// Verify Windows cache file pattern is included
+			expect(excludePatterns).toContain("Thumbs.db")
+		})
+	})
+
+	describe("getLargeFileAutoExcludePatterns with LFS pre-filtering", () => {
+		it("should pre-filter git-lfs patterns when scanning for large files", async () => {
+			// Mock .gitattributes file exists with LFS patterns
+			vi.mocked(fileExistsAtPath).mockResolvedValue(true)
+			const gitAttributesContent = `*.psd filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+`
+			vi.mocked(fs.readFile).mockResolvedValue(gitAttributesContent)
+
+			// Mock executeRipgrep to return some files
+			vi.mocked(executeRipgrep).mockResolvedValue([
+				{ path: "file1.txt", type: "file", label: "file1.txt" },
+				{ path: "large.bin", type: "file", label: "large.bin" },
+				{ path: "code.js", type: "file", label: "code.js" },
+			])
+
+			// Mock file stats
+			vi.mocked(fs.stat).mockImplementation(async (path) => {
+				const pathStr = path.toString()
+				if (pathStr.includes("large.bin")) {
+					return { size: 20 * 1024 * 1024 } as any // 20MB
+				}
+				return { size: 1024 } as any // 1KB
+			})
+
+			// Get exclude patterns with stats
+			const result = await getExcludePatternsWithStats(testWorkspacePath)
+
+			// Verify executeRipgrep was called with LFS patterns as exclusions
+			expect(executeRipgrep).toHaveBeenCalledWith(
+				expect.objectContaining({
+					args: expect.arrayContaining(["-g", "!*.psd", "-g", "!*.zip", "-g", "!*.mp4"]),
+					workspacePath: testWorkspacePath,
+				}),
+			)
+
+			// Verify large.bin was detected and included
+			expect(result.stats.largeFilesExcluded).toBe(1)
+			expect(result.stats.sample).toContain("large.bin")
+		})
+
+		it("should handle empty LFS patterns gracefully", async () => {
+			// Mock no .gitattributes file
+			vi.mocked(fileExistsAtPath).mockResolvedValue(false)
+
+			// Mock executeRipgrep to return some files
+			vi.mocked(executeRipgrep).mockResolvedValue([
+				{ path: "file1.txt", type: "file", label: "file1.txt" },
+				{ path: "large.bin", type: "file", label: "large.bin" },
+			])
+
+			// Mock file stats
+			vi.mocked(fs.stat).mockImplementation(async (path) => {
+				const pathStr = path.toString()
+				if (pathStr.includes("large.bin")) {
+					return { size: 20 * 1024 * 1024 } as any // 20MB
+				}
+				return { size: 1024 } as any // 1KB
+			})
+
+			// Get exclude patterns with stats
+			const result = await getExcludePatternsWithStats(testWorkspacePath)
+
+			// Verify executeRipgrep was called without LFS patterns
+			expect(executeRipgrep).toHaveBeenCalledWith(
+				expect.objectContaining({
+					args: expect.not.arrayContaining(["-g", "!*.psd", "-g", "!*.zip", "-g", "!*.mp4"]),
+					workspacePath: testWorkspacePath,
+				}),
+			)
+
+			// Verify large file was still detected
+			expect(result.stats.largeFilesExcluded).toBe(1)
+			expect(result.stats.sample).toContain("large.bin")
+		})
+
+		it("should not exclude code files even if they are large", async () => {
+			// Mock no .gitattributes file
+			vi.mocked(fileExistsAtPath).mockResolvedValue(false)
+
+			// Mock executeRipgrep to return some files including large code files
+			vi.mocked(executeRipgrep).mockResolvedValue([
+				{ path: "huge.js", type: "file", label: "huge.js" },
+				{ path: "large.bin", type: "file", label: "large.bin" },
+				{ path: "big.ts", type: "file", label: "big.ts" },
+			])
+
+			// Mock file stats - all files are large
+			vi.mocked(fs.stat).mockImplementation(async () => {
+				return { size: 20 * 1024 * 1024 } as any // 20MB
+			})
+
+			// Get exclude patterns with stats
+			const result = await getExcludePatternsWithStats(testWorkspacePath)
+
+			// Verify only non-code file was excluded
+			expect(result.stats.largeFilesExcluded).toBe(1)
+			expect(result.stats.sample).toContain("large.bin")
+			expect(result.stats.sample).not.toContain("huge.js")
+			expect(result.stats.sample).not.toContain("big.ts")
+		})
+	})
+
+	describe("configurable threshold and error reporting", () => {
+		it("respects ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB override", async () => {
+			// Ensure no LFS patterns
+			vi.mocked(fileExistsAtPath).mockResolvedValue(false)
+
+			// Set threshold to 1 MB
+			const prev = process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
+			process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB = "1"
+
+			try {
+				// Mock file listing
+				vi.mocked(executeRipgrep).mockResolvedValue([
+					{ path: "large.bin", type: "file", label: "large.bin" },
+					{ path: "code.js", type: "file", label: "code.js" },
+				])
+
+				// Mock sizes: 2MB for large.bin, 2MB for code.js (but code is allowlisted)
+				vi.mocked(fs.stat).mockImplementation(async (p) => {
+					const s = p.toString()
+					if (s.includes("large.bin") || s.includes("code.js")) {
+						return { size: 2 * 1024 * 1024 } as any
+					}
+					return { size: 1024 } as any
+				})
+
+				const result = await getExcludePatternsWithStats(testWorkspacePath)
+
+				expect(result.stats.thresholdBytes).toBe(1 * 1024 * 1024)
+				expect(result.stats.largeFilesExcluded).toBe(1)
+				expect(result.stats.sample).toContain("large.bin")
+				// code.js should never be excluded even if large
+				expect(result.stats.sample).not.toContain("code.js")
+			} finally {
+				// cleanup
+				if (prev === undefined) {
+					delete process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
+				} else {
+					process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB = prev
+				}
+			}
+		})
+
+		it("records ripgrep failures without breaking pattern generation", async () => {
+			vi.mocked(fileExistsAtPath).mockResolvedValue(false)
+			// Force executeRipgrep to throw
+			vi.mocked(executeRipgrep).mockRejectedValue(new Error("ripgrep failed"))
+
+			const result = await getExcludePatternsWithStats(testWorkspacePath)
+
+			// No dynamic large files because ripgrep failed
+			expect(result.stats.largeFilesExcluded).toBe(0)
+			expect(result.stats.sample.length).toBe(0)
+			// Error counts should reflect one ripgrep error
+			expect(result.stats.errorCounts?.ripgrepErrors).toBe(1)
+			expect(result.stats.errorCounts?.fsStatErrors).toBe(0)
+			// Base patterns should still include .git/
+			expect(result.patterns).toContain(".git/")
+		})
+
+		it("counts fs.stat errors for diagnostics", async () => {
+			vi.mocked(fileExistsAtPath).mockResolvedValue(false)
+			vi.mocked(executeRipgrep).mockResolvedValue([{ path: "mystery.bin", type: "file", label: "mystery.bin" }])
+			// Make stat fail
+			vi.mocked(fs.stat).mockRejectedValue(new Error("stat failure"))
+
+			const result = await getExcludePatternsWithStats(testWorkspacePath)
+
+			expect(result.stats.largeFilesExcluded).toBe(0)
+			expect(result.stats.sample.length).toBe(0)
+			expect(result.stats.errorCounts?.ripgrepErrors).toBe(0)
+			expect(result.stats.errorCounts?.fsStatErrors).toBe(1)
+		})
 	})
 })