Skip to content

Commit 8ca0c46

Browse files
committed
checkpoints: make large-file threshold configurable via ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB; improve error reporting for auto-exclude (ripgrep/fs.stat); log diagnostics in ShadowCheckpointService; add tests
1 parent 038d0d2 commit 8ca0c46

File tree

3 files changed

+161
-52
lines changed

3 files changed

+161
-52
lines changed

src/services/checkpoints/ShadowCheckpointService.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,19 @@ export abstract class ShadowCheckpointService extends EventEmitter {
142142
const { patterns, stats } = await getExcludePatternsWithStats(this.workspaceDir)
143143
await fs.writeFile(path.join(this.dotGitDir, "info", "exclude"), patterns.join("\n"))
144144

145+
const mb = Math.round(stats.thresholdBytes / (1024 * 1024))
146+
145147
if (stats?.largeFilesExcluded && stats.largeFilesExcluded > 0) {
146-
const mb = Math.round(stats.thresholdBytes / (1024 * 1024))
147148
this.log(
148149
`[${this.constructor.name}#writeExcludeFile] auto-excluding ${stats.largeFilesExcluded} large files (>= ${mb}MB) from checkpoints. Sample: ${stats.sample.join(", ")}`,
149150
)
150151
}
152+
153+
if (stats?.errorCounts && (stats.errorCounts.ripgrepErrors > 0 || stats.errorCounts.fsStatErrors > 0)) {
154+
this.log(
155+
`[${this.constructor.name}#writeExcludeFile] auto-exclude encountered errors (ripgrepErrors=${stats.errorCounts.ripgrepErrors}, fsStatErrors=${stats.errorCounts.fsStatErrors}). Check environment and filesystem permissions.`,
156+
)
157+
}
151158
}
152159

153160
private async stageAll(git: SimpleGit) {

src/services/checkpoints/__tests__/excludes.spec.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,4 +264,78 @@ readme.md text
264264
expect(result.stats.sample).not.toContain("big.ts")
265265
})
266266
})
267+
268+
describe("configurable threshold and error reporting", () => {
269+
it("respects ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB override", async () => {
270+
// Ensure no LFS patterns
271+
vi.mocked(fileExistsAtPath).mockResolvedValue(false)
272+
273+
// Set threshold to 1 MB
274+
const prev = process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
275+
process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB = "1"
276+
277+
try {
278+
// Mock file listing
279+
vi.mocked(executeRipgrep).mockResolvedValue([
280+
{ path: "large.bin", type: "file", label: "large.bin" },
281+
{ path: "code.js", type: "file", label: "code.js" },
282+
])
283+
284+
// Mock sizes: 2MB for large.bin, 2MB for code.js (but code is allowlisted)
285+
vi.mocked(fs.stat).mockImplementation(async (p) => {
286+
const s = p.toString()
287+
if (s.includes("large.bin") || s.includes("code.js")) {
288+
return { size: 2 * 1024 * 1024 } as any
289+
}
290+
return { size: 1024 } as any
291+
})
292+
293+
const result = await getExcludePatternsWithStats(testWorkspacePath)
294+
295+
expect(result.stats.thresholdBytes).toBe(1 * 1024 * 1024)
296+
expect(result.stats.largeFilesExcluded).toBe(1)
297+
expect(result.stats.sample).toContain("large.bin")
298+
// code.js should never be excluded even if large
299+
expect(result.stats.sample).not.toContain("code.js")
300+
} finally {
301+
// cleanup
302+
if (prev === undefined) {
303+
delete process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
304+
} else {
305+
process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB = prev
306+
}
307+
}
308+
})
309+
310+
it("records ripgrep failures without breaking pattern generation", async () => {
311+
vi.mocked(fileExistsAtPath).mockResolvedValue(false)
312+
// Force executeRipgrep to throw
313+
vi.mocked(executeRipgrep).mockRejectedValue(new Error("ripgrep failed"))
314+
315+
const result = await getExcludePatternsWithStats(testWorkspacePath)
316+
317+
// No dynamic large files because ripgrep failed
318+
expect(result.stats.largeFilesExcluded).toBe(0)
319+
expect(result.stats.sample.length).toBe(0)
320+
// Error counts should reflect one ripgrep error
321+
expect(result.stats.errorCounts?.ripgrepErrors).toBe(1)
322+
expect(result.stats.errorCounts?.fsStatErrors).toBe(0)
323+
// Base patterns should still include .git/
324+
expect(result.patterns).toContain(".git/")
325+
})
326+
327+
it("counts fs.stat errors for diagnostics", async () => {
328+
vi.mocked(fileExistsAtPath).mockResolvedValue(false)
329+
vi.mocked(executeRipgrep).mockResolvedValue([{ path: "mystery.bin", type: "file", label: "mystery.bin" }])
330+
// Make stat fail
331+
vi.mocked(fs.stat).mockRejectedValue(new Error("stat failure"))
332+
333+
const result = await getExcludePatternsWithStats(testWorkspacePath)
334+
335+
expect(result.stats.largeFilesExcluded).toBe(0)
336+
expect(result.stats.sample.length).toBe(0)
337+
expect(result.stats.errorCounts?.ripgrepErrors).toBe(0)
338+
expect(result.stats.errorCounts?.fsStatErrors).toBe(1)
339+
})
340+
})
267341
})

src/services/checkpoints/excludes.ts

Lines changed: 79 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,20 @@ import fs from "fs/promises"
22
import * as path from "path"
33

44
import { fileExistsAtPath } from "../../utils/fs"
5-
import { executeRipgrepForFiles, executeRipgrep } from "../search/file-search"
5+
import { executeRipgrep } from "../search/file-search"
66

77
const DEFAULT_LARGE_FILE_THRESHOLD_BYTES = 10 * 1024 * 1024 // 10 MB
88

9+
function getConfiguredLargeFileThresholdBytes(): number {
10+
// Allow override via environment variable (in MB), e.g. ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB=25
11+
const env = process.env.ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
12+
const parsed = env ? Number(env) : NaN
13+
if (Number.isFinite(parsed) && parsed > 0) {
14+
return Math.round(parsed * 1024 * 1024)
15+
}
16+
return DEFAULT_LARGE_FILE_THRESHOLD_BYTES
17+
}
18+
919
// Common code/text extensions that should not be auto-excluded by size
1020
const CODE_EXT_ALLOWLIST: Set<string> = new Set<string>([
1121
".ts",
@@ -273,62 +283,71 @@ const getGameEnginePatterns = () => [
273283
*/
274284
async function getLargeFileAutoExcludePatterns(
275285
workspacePath: string,
276-
thresholdBytes: number = DEFAULT_LARGE_FILE_THRESHOLD_BYTES,
286+
thresholdBytes: number,
277287
lfsPatterns: string[] = [],
278-
): Promise<string[]> {
279-
try {
280-
// Create a custom ripgrep execution that excludes git-lfs patterns
281-
const args = [
282-
"--files",
283-
"--follow",
284-
"--hidden",
285-
"-g",
286-
"!**/node_modules/**",
287-
"-g",
288-
"!**/.git/**",
289-
"-g",
290-
"!**/out/**",
291-
"-g",
292-
"!**/dist/**",
293-
]
294-
295-
// Add git-lfs patterns as exclusions to ripgrep
296-
// This pre-filters files before we check their sizes
297-
for (const pattern of lfsPatterns) {
298-
// Convert git-lfs patterns to ripgrep glob patterns
299-
// Git patterns like "*.psd" need to be "!*.psd" for ripgrep
300-
const rgPattern = pattern.startsWith("!") ? pattern.substring(1) : `!${pattern}`
301-
args.push("-g", rgPattern)
302-
}
288+
): Promise<{ patterns: string[]; errorCounts: { ripgrepErrors: number; fsStatErrors: number } }> {
289+
// Build ripgrep args with common ignores
290+
const args = [
291+
"--files",
292+
"--follow",
293+
"--hidden",
294+
"-g",
295+
"!**/node_modules/**",
296+
"-g",
297+
"!**/.git/**",
298+
"-g",
299+
"!**/out/**",
300+
"-g",
301+
"!**/dist/**",
302+
]
303+
304+
// Pre-filter git-lfs patterns at ripgrep level
305+
for (const pattern of lfsPatterns) {
306+
const rgPattern = pattern.startsWith("!") ? pattern.substring(1) : `!${pattern}`
307+
args.push("-g", rgPattern)
308+
}
309+
310+
args.push(workspacePath)
303311

304-
args.push(workspacePath)
312+
let items: Array<{ path: string; type: string }> = []
313+
let ripgrepErrors = 0
314+
let fsStatErrors = 0
305315

306-
const items = await executeRipgrep({ args, workspacePath, limit: 50000 })
307-
const large: string[] = []
316+
try {
317+
const rgResult = await executeRipgrep({ args, workspacePath, limit: 50000 })
318+
items = Array.isArray(rgResult) ? rgResult : []
319+
} catch {
320+
// If ripgrep fails, record error and continue with empty items to avoid breaking checkpoints
321+
ripgrepErrors = 1
322+
items = []
323+
}
308324

309-
for (const item of items) {
310-
if (item.type !== "file") continue
325+
const large: string[] = []
311326

312-
const rel = item.path
313-
const ext = path.extname(rel).toLowerCase()
327+
for (const item of items) {
328+
if ((item as any).type !== "file") continue
314329

315-
// Keep code/text files even if large
316-
if (CODE_EXT_ALLOWLIST.has(ext)) continue
330+
const rel = (item as any).path
331+
const ext = path.extname(rel).toLowerCase()
317332

318-
try {
319-
const stat = await fs.stat(path.join(workspacePath, rel))
320-
if (stat.size >= thresholdBytes) {
321-
// Normalize to forward slashes for git exclude
322-
large.push(rel.replace(/\\/g, "/"))
323-
}
324-
} catch {
325-
// Ignore stat errors for individual files
333+
// Keep code/text files even if large
334+
if (CODE_EXT_ALLOWLIST.has(ext)) continue
335+
336+
try {
337+
const stat = await fs.stat(path.join(workspacePath, rel))
338+
if (stat.size >= thresholdBytes) {
339+
// Normalize to forward slashes for git exclude
340+
large.push(rel.replace(/\\/g, "/"))
326341
}
342+
} catch {
343+
// Count stat errors for diagnostics
344+
fsStatErrors++
327345
}
346+
}
328347

329-
return Array.from(new Set(large))
330-
} catch {
331-
return []
348+
return {
349+
patterns: Array.from(new Set(large)),
350+
errorCounts: { ripgrepErrors, fsStatErrors },
332351
}
333352
}
334353

@@ -337,7 +356,12 @@ async function getLargeFileAutoExcludePatterns(
337356
*/
338357
export async function getExcludePatternsWithStats(workspacePath: string): Promise<{
339358
patterns: string[]
340-
stats: { largeFilesExcluded: number; thresholdBytes: number; sample: string[] }
359+
stats: {
360+
largeFilesExcluded: number
361+
thresholdBytes: number
362+
sample: string[]
363+
errorCounts?: { ripgrepErrors: number; fsStatErrors: number }
364+
}
341365
}> {
342366
// Get git-lfs patterns first
343367
const lfsPatterns = await getLfsPatterns(workspacePath)
@@ -356,10 +380,13 @@ export async function getExcludePatternsWithStats(workspacePath: string): Promis
356380
...lfsPatterns,
357381
]
358382

383+
// Determine threshold (env override supported)
384+
const thresholdBytes = getConfiguredLargeFileThresholdBytes()
385+
359386
// Pass lfs patterns to the large file scanner to pre-filter them
360-
const dynamicLarge = await getLargeFileAutoExcludePatterns(
387+
const { patterns: dynamicLarge, errorCounts } = await getLargeFileAutoExcludePatterns(
361388
workspacePath,
362-
DEFAULT_LARGE_FILE_THRESHOLD_BYTES,
389+
thresholdBytes,
363390
lfsPatterns,
364391
)
365392

@@ -369,8 +396,9 @@ export async function getExcludePatternsWithStats(workspacePath: string): Promis
369396
patterns,
370397
stats: {
371398
largeFilesExcluded: dynamicLarge.length,
372-
thresholdBytes: DEFAULT_LARGE_FILE_THRESHOLD_BYTES,
399+
thresholdBytes,
373400
sample: dynamicLarge.slice(0, 10),
401+
errorCounts,
374402
},
375403
}
376404
}

0 commit comments

Comments
 (0)