Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 45 additions & 5 deletions src/services/checkpoints/ShadowCheckpointService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { fileExistsAtPath } from "../../utils/fs"
import { executeRipgrep } from "../../services/search/file-search"

import { CheckpointDiff, CheckpointResult, CheckpointEventMap } from "./types"
import { getExcludePatterns } from "./excludes"
import { getExcludePatterns, getExcludePatternsWithStats } from "./excludes"

export abstract class ShadowCheckpointService extends EventEmitter {
public readonly taskId: string
Expand Down Expand Up @@ -95,7 +95,12 @@ export abstract class ShadowCheckpointService extends EventEmitter {
)
}

await this.writeExcludeFile()
// Only regenerate exclude file if it doesn't exist
const excludePath = path.join(this.dotGitDir, "info", "exclude")
if (!(await fileExistsAtPath(excludePath))) {
this.log(`[${this.constructor.name}#initShadowGit] exclude file missing, regenerating`)
await this.writeExcludeFile()
}
this.baseHash = await git.revparse(["HEAD"])
} else {
this.log(`[${this.constructor.name}#initShadowGit] creating shadow git repo at ${this.checkpointsDir}`)
Expand All @@ -104,7 +109,8 @@ export abstract class ShadowCheckpointService extends EventEmitter {
await git.addConfig("commit.gpgSign", "false") // Disable commit signing for shadow repo.
await git.addConfig("user.name", "Roo Code")
await git.addConfig("user.email", "[email protected]")
await this.writeExcludeFile()
// Force write exclude file on initial creation (git creates a default one)
await this.writeExcludeFile(true)
await this.stageAll(git)
const { commit } = await git.commit("initial commit", { "--allow-empty": null })
this.baseHash = commit
Expand Down Expand Up @@ -137,10 +143,44 @@ export abstract class ShadowCheckpointService extends EventEmitter {
// .git/info/exclude is local to the shadow git repo, so it's not
// shared with the main repo - and won't conflict with user's
// .gitignore.
protected async writeExcludeFile() {
// Note: This is only called on initial creation or when the exclude file is missing
// to avoid expensive scans on every initialization.
protected async writeExcludeFile(forceRefresh: boolean = false) {
// Skip if exclude file exists and not forcing refresh
if (!forceRefresh) {
const excludePath = path.join(this.dotGitDir, "info", "exclude")
if (await fileExistsAtPath(excludePath)) {
this.log(`[${this.constructor.name}#writeExcludeFile] exclude file exists, skipping regeneration`)
return
}
}

await fs.mkdir(path.join(this.dotGitDir, "info"), { recursive: true })
const patterns = await getExcludePatterns(this.workspaceDir)
const { patterns, stats } = await getExcludePatternsWithStats(this.workspaceDir)
await fs.writeFile(path.join(this.dotGitDir, "info", "exclude"), patterns.join("\n"))

const mb = Math.round(stats.thresholdBytes / (1024 * 1024))

if (stats?.largeFilesExcluded && stats.largeFilesExcluded > 0) {
Copy link

Copilot AI Aug 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The condition can be simplified to if (stats.largeFilesExcluded > 0) since the optional chaining already handles the undefined case and numbers greater than 0 are truthy.

Suggested change
if (stats?.largeFilesExcluded && stats.largeFilesExcluded > 0) {
if (stats.largeFilesExcluded > 0) {

Copilot uses AI. Check for mistakes.
this.log(
`[${this.constructor.name}#writeExcludeFile] auto-excluding ${stats.largeFilesExcluded} large files (>= ${mb}MB) from checkpoints. Sample: ${stats.sample.join(", ")}`,
)
}

if (stats?.errorCounts && (stats.errorCounts.ripgrepErrors > 0 || stats.errorCounts.fsStatErrors > 0)) {
this.log(
`[${this.constructor.name}#writeExcludeFile] auto-exclude encountered errors (ripgrepErrors=${stats.errorCounts.ripgrepErrors}, fsStatErrors=${stats.errorCounts.fsStatErrors}). Check environment and filesystem permissions.`,
)
}
}

// Public method to allow manual refresh of exclude patterns if needed
public async refreshExcludePatterns() {
if (!this.git) {
throw new Error("Shadow git repo not initialized")
}
this.log(`[${this.constructor.name}#refreshExcludePatterns] manually refreshing exclude patterns`)
await this.writeExcludeFile(true)
}

private async stageAll(git: SimpleGit) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,11 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
})

it("does not create a checkpoint for ignored files", async () => {
// Verify that the exclude file was created during initialization
const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
const excludeContent = await fs.readFile(excludesPath, "utf-8")
expect(excludeContent).toContain("*.log")

// Create a file that matches an ignored pattern (e.g., .log file).
const ignoredFile = path.join(service.workspaceDir, "ignored.log")
await fs.writeFile(ignoredFile, "Initial ignored content")
Expand All @@ -315,10 +320,12 @@ describe.each([[RepoPerTaskCheckpointService, "RepoPerTaskCheckpointService"]])(
const gitattributesPath = path.join(service.workspaceDir, ".gitattributes")
await fs.writeFile(gitattributesPath, "*.lfs filter=lfs diff=lfs merge=lfs -text")

// Delete the exclude file to force regeneration with new LFS patterns
const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
await fs.unlink(excludesPath).catch(() => {}) // Ignore error if file doesn't exist

// Re-initialize the service to trigger a write to .git/info/exclude.
service = new klass(service.taskId, service.checkpointsDir, service.workspaceDir, () => {})
const excludesPath = path.join(service.checkpointsDir, ".git", "info", "exclude")
expect((await fs.readFile(excludesPath, "utf-8")).split("\n")).not.toContain("*.lfs")
await service.initShadowGit()
expect((await fs.readFile(excludesPath, "utf-8")).split("\n")).toContain("*.lfs")

Expand Down
198 changes: 197 additions & 1 deletion src/services/checkpoints/__tests__/excludes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import { join } from "path"
import fs from "fs/promises"
import { fileExistsAtPath } from "../../../utils/fs"
import { getExcludePatterns } from "../excludes"
import { getExcludePatterns, getExcludePatternsWithStats } from "../excludes"
import { executeRipgrep } from "../../search/file-search"

// Mock fs/promises
vi.mock("fs/promises", () => ({
default: {
readFile: vi.fn(),
stat: vi.fn(),
},
}))

Expand All @@ -17,6 +19,12 @@ vi.mock("../../../utils/fs", () => ({
fileExistsAtPath: vi.fn(),
}))

// Mock executeRipgrep
vi.mock("../../search/file-search", () => ({
executeRipgrep: vi.fn(),
executeRipgrepForFiles: vi.fn(),
}))

describe("getExcludePatterns", () => {
const testWorkspacePath = "/test/workspace"

Expand Down Expand Up @@ -151,5 +159,193 @@ readme.md text
expect(excludePatterns).toContain("*.shp") // geospatial
expect(excludePatterns).toContain("*.log") // log
})
it("should include Windows Thumbs.db cache pattern", async () => {
// Mock .gitattributes file doesn't exist
vi.mocked(fileExistsAtPath).mockResolvedValue(false)

// Get exclude patterns
const excludePatterns = await getExcludePatterns(testWorkspacePath)

// Verify Windows cache file pattern is included
expect(excludePatterns).toContain("Thumbs.db")
})
})

describe("getLargeFileAutoExcludePatterns with LFS pre-filtering", () => {
it("should pre-filter git-lfs patterns when scanning for large files", async () => {
// Mock .gitattributes file exists with LFS patterns
vi.mocked(fileExistsAtPath).mockResolvedValue(true)
const gitAttributesContent = `*.psd filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.mp4 filter=lfs diff=lfs merge=lfs -text
`
vi.mocked(fs.readFile).mockResolvedValue(gitAttributesContent)

// Mock executeRipgrep to return some files
vi.mocked(executeRipgrep).mockResolvedValue([
{ path: "file1.txt", type: "file", label: "file1.txt" },
{ path: "large.bin", type: "file", label: "large.bin" },
{ path: "code.js", type: "file", label: "code.js" },
])

// Mock file stats
vi.mocked(fs.stat).mockImplementation(async (path) => {
const pathStr = path.toString()
if (pathStr.includes("large.bin")) {
return { size: 20 * 1024 * 1024 } as any // 20MB
}
return { size: 1024 } as any // 1KB
})

// Get exclude patterns with stats
const result = await getExcludePatternsWithStats(testWorkspacePath)

// Verify executeRipgrep was called with LFS patterns as exclusions
expect(executeRipgrep).toHaveBeenCalledWith(
expect.objectContaining({
args: expect.arrayContaining(["-g", "!*.psd", "-g", "!*.zip", "-g", "!*.mp4"]),
workspacePath: testWorkspacePath,
}),
)

// Verify large.bin was detected and included
expect(result.stats.largeFilesExcluded).toBe(1)
expect(result.stats.sample).toContain("large.bin")
})

it("should handle empty LFS patterns gracefully", async () => {
// Mock no .gitattributes file
vi.mocked(fileExistsAtPath).mockResolvedValue(false)

// Mock executeRipgrep to return some files
vi.mocked(executeRipgrep).mockResolvedValue([
{ path: "file1.txt", type: "file", label: "file1.txt" },
{ path: "large.bin", type: "file", label: "large.bin" },
])

// Mock file stats
vi.mocked(fs.stat).mockImplementation(async (path) => {
const pathStr = path.toString()
if (pathStr.includes("large.bin")) {
return { size: 20 * 1024 * 1024 } as any // 20MB
}
return { size: 1024 } as any // 1KB
})

// Get exclude patterns with stats
const result = await getExcludePatternsWithStats(testWorkspacePath)

// Verify executeRipgrep was called without LFS patterns
expect(executeRipgrep).toHaveBeenCalledWith(
expect.objectContaining({
args: expect.not.arrayContaining(["-g", "!*.psd", "-g", "!*.zip", "-g", "!*.mp4"]),
workspacePath: testWorkspacePath,
}),
)

// Verify large file was still detected
expect(result.stats.largeFilesExcluded).toBe(1)
expect(result.stats.sample).toContain("large.bin")
})

it("should not exclude code files even if they are large", async () => {
// Mock no .gitattributes file
vi.mocked(fileExistsAtPath).mockResolvedValue(false)

// Mock executeRipgrep to return some files including large code files
vi.mocked(executeRipgrep).mockResolvedValue([
{ path: "huge.js", type: "file", label: "huge.js" },
{ path: "large.bin", type: "file", label: "large.bin" },
{ path: "big.ts", type: "file", label: "big.ts" },
])

// Mock file stats - all files are large
vi.mocked(fs.stat).mockImplementation(async () => {
return { size: 20 * 1024 * 1024 } as any // 20MB
})

// Get exclude patterns with stats
const result = await getExcludePatternsWithStats(testWorkspacePath)

// Verify only non-code file was excluded
expect(result.stats.largeFilesExcluded).toBe(1)
expect(result.stats.sample).toContain("large.bin")
expect(result.stats.sample).not.toContain("huge.js")
expect(result.stats.sample).not.toContain("big.ts")
})
})

describe("configurable threshold and error reporting", () => {
it("respects ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB override", async () => {
// Ensure no LFS patterns
vi.mocked(fileExistsAtPath).mockResolvedValue(false)

// Set threshold to 1 MB
const prev = process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB = "1"

try {
// Mock file listing
vi.mocked(executeRipgrep).mockResolvedValue([
{ path: "large.bin", type: "file", label: "large.bin" },
{ path: "code.js", type: "file", label: "code.js" },
])

// Mock sizes: 2MB for large.bin, 2MB for code.js (but code is allowlisted)
vi.mocked(fs.stat).mockImplementation(async (p) => {
const s = p.toString()
if (s.includes("large.bin") || s.includes("code.js")) {
return { size: 2 * 1024 * 1024 } as any
}
return { size: 1024 } as any
})

const result = await getExcludePatternsWithStats(testWorkspacePath)

expect(result.stats.thresholdBytes).toBe(1 * 1024 * 1024)
expect(result.stats.largeFilesExcluded).toBe(1)
expect(result.stats.sample).toContain("large.bin")
// code.js should never be excluded even if large
expect(result.stats.sample).not.toContain("code.js")
} finally {
// cleanup
if (prev === undefined) {
delete process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
} else {
process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB = prev
}
}
})

it("records ripgrep failures without breaking pattern generation", async () => {
vi.mocked(fileExistsAtPath).mockResolvedValue(false)
// Force executeRipgrep to throw
vi.mocked(executeRipgrep).mockRejectedValue(new Error("ripgrep failed"))

const result = await getExcludePatternsWithStats(testWorkspacePath)

// No dynamic large files because ripgrep failed
expect(result.stats.largeFilesExcluded).toBe(0)
expect(result.stats.sample.length).toBe(0)
// Error counts should reflect one ripgrep error
expect(result.stats.errorCounts?.ripgrepErrors).toBe(1)
expect(result.stats.errorCounts?.fsStatErrors).toBe(0)
// Base patterns should still include .git/
expect(result.patterns).toContain(".git/")
})

it("counts fs.stat errors for diagnostics", async () => {
vi.mocked(fileExistsAtPath).mockResolvedValue(false)
vi.mocked(executeRipgrep).mockResolvedValue([{ path: "mystery.bin", type: "file", label: "mystery.bin" }])
// Make stat fail
vi.mocked(fs.stat).mockRejectedValue(new Error("stat failure"))

const result = await getExcludePatternsWithStats(testWorkspacePath)

expect(result.stats.largeFilesExcluded).toBe(0)
expect(result.stats.sample.length).toBe(0)
expect(result.stats.errorCounts?.ripgrepErrors).toBe(0)
expect(result.stats.errorCounts?.fsStatErrors).toBe(1)
})
})
})
Loading
Loading