@@ -2,10 +2,20 @@ import fs from "fs/promises"
22import * as path from "path"
33
44import { fileExistsAtPath } from "../../utils/fs"
5- import { executeRipgrepForFiles , executeRipgrep } from "../search/file-search"
5+ import { executeRipgrep } from "../search/file-search"
66
77const DEFAULT_LARGE_FILE_THRESHOLD_BYTES = 10 * 1024 * 1024 // 10 MB
88
9+ function getConfiguredLargeFileThresholdBytes ( ) : number {
10+ // Allow override via environment variable (in MB), e.g. ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB=25
11+ const env = process . env . ROO_CHECKPOINTS_LARGE_FILE_THRESHOLD_MB
12+ const parsed = env ? Number ( env ) : NaN
13+ if ( Number . isFinite ( parsed ) && parsed > 0 ) {
14+ return Math . round ( parsed * 1024 * 1024 )
15+ }
16+ return DEFAULT_LARGE_FILE_THRESHOLD_BYTES
17+ }
18+
919// Common code/text extensions that should not be auto-excluded by size
1020const CODE_EXT_ALLOWLIST : Set < string > = new Set < string > ( [
1121 ".ts" ,
@@ -273,62 +283,71 @@ const getGameEnginePatterns = () => [
273283 */
274284async function getLargeFileAutoExcludePatterns (
275285 workspacePath : string ,
276- thresholdBytes : number = DEFAULT_LARGE_FILE_THRESHOLD_BYTES ,
286+ thresholdBytes : number ,
277287 lfsPatterns : string [ ] = [ ] ,
278- ) : Promise < string [ ] > {
279- try {
280- // Create a custom ripgrep execution that excludes git-lfs patterns
281- const args = [
282- "--files" ,
283- "--follow" ,
284- "--hidden" ,
285- "-g" ,
286- "!**/node_modules/**" ,
287- "-g" ,
288- "!**/.git/**" ,
289- "-g" ,
290- "!**/out/**" ,
291- "-g" ,
292- "!**/dist/**" ,
293- ]
294-
295- // Add git-lfs patterns as exclusions to ripgrep
296- // This pre-filters files before we check their sizes
297- for ( const pattern of lfsPatterns ) {
298- // Convert git-lfs patterns to ripgrep glob patterns
299- // Git patterns like "*.psd" need to be "!*.psd" for ripgrep
300- const rgPattern = pattern . startsWith ( "!" ) ? pattern . substring ( 1 ) : `!${ pattern } `
301- args . push ( "-g" , rgPattern )
302- }
288+ ) : Promise < { patterns : string [ ] ; errorCounts : { ripgrepErrors : number ; fsStatErrors : number } } > {
289+ // Build ripgrep args with common ignores
290+ const args = [
291+ "--files" ,
292+ "--follow" ,
293+ "--hidden" ,
294+ "-g" ,
295+ "!**/node_modules/**" ,
296+ "-g" ,
297+ "!**/.git/**" ,
298+ "-g" ,
299+ "!**/out/**" ,
300+ "-g" ,
301+ "!**/dist/**" ,
302+ ]
303+
304+ // Pre-filter git-lfs patterns at ripgrep level
305+ for ( const pattern of lfsPatterns ) {
306+ const rgPattern = pattern . startsWith ( "!" ) ? pattern . substring ( 1 ) : `!${ pattern } `
307+ args . push ( "-g" , rgPattern )
308+ }
309+
310+ args . push ( workspacePath )
303311
304- args . push ( workspacePath )
312+ let items : Array < { path : string ; type : string } > = [ ]
313+ let ripgrepErrors = 0
314+ let fsStatErrors = 0
305315
306- const items = await executeRipgrep ( { args, workspacePath, limit : 50000 } )
307- const large : string [ ] = [ ]
316+ try {
317+ const rgResult = await executeRipgrep ( { args, workspacePath, limit : 50000 } )
318+ items = Array . isArray ( rgResult ) ? rgResult : [ ]
319+ } catch {
320+ // If ripgrep fails, record error and continue with empty items to avoid breaking checkpoints
321+ ripgrepErrors = 1
322+ items = [ ]
323+ }
308324
309- for ( const item of items ) {
310- if ( item . type !== "file" ) continue
325+ const large : string [ ] = [ ]
311326
312- const rel = item . path
313- const ext = path . extname ( rel ) . toLowerCase ( )
327+ for ( const item of items ) {
328+ if ( ( item as any ) . type !== "file" ) continue
314329
315- // Keep code/text files even if large
316- if ( CODE_EXT_ALLOWLIST . has ( ext ) ) continue
330+ const rel = ( item as any ) . path
331+ const ext = path . extname ( rel ) . toLowerCase ( )
317332
318- try {
319- const stat = await fs . stat ( path . join ( workspacePath , rel ) )
320- if ( stat . size >= thresholdBytes ) {
321- // Normalize to forward slashes for git exclude
322- large . push ( rel . replace ( / \\ / g , "/" ) )
323- }
324- } catch {
325- // Ignore stat errors for individual files
333+ // Keep code/text files even if large
334+ if ( CODE_EXT_ALLOWLIST . has ( ext ) ) continue
335+
336+ try {
337+ const stat = await fs . stat ( path . join ( workspacePath , rel ) )
338+ if ( stat . size >= thresholdBytes ) {
339+ // Normalize to forward slashes for git exclude
340+ large . push ( rel . replace ( / \\ / g , "/" ) )
326341 }
342+ } catch {
343+ // Count stat errors for diagnostics
344+ fsStatErrors ++
327345 }
346+ }
328347
329- return Array . from ( new Set ( large ) )
330- } catch {
331- return [ ]
348+ return {
349+ patterns : Array . from ( new Set ( large ) ) ,
350+ errorCounts : { ripgrepErrors , fsStatErrors } ,
332351 }
333352}
334353
@@ -337,7 +356,12 @@ async function getLargeFileAutoExcludePatterns(
337356 */
338357export async function getExcludePatternsWithStats ( workspacePath : string ) : Promise < {
339358 patterns : string [ ]
340- stats : { largeFilesExcluded : number ; thresholdBytes : number ; sample : string [ ] }
359+ stats : {
360+ largeFilesExcluded : number
361+ thresholdBytes : number
362+ sample : string [ ]
363+ errorCounts ?: { ripgrepErrors : number ; fsStatErrors : number }
364+ }
341365} > {
342366 // Get git-lfs patterns first
343367 const lfsPatterns = await getLfsPatterns ( workspacePath )
@@ -356,10 +380,13 @@ export async function getExcludePatternsWithStats(workspacePath: string): Promis
356380 ...lfsPatterns ,
357381 ]
358382
383+ // Determine threshold (env override supported)
384+ const thresholdBytes = getConfiguredLargeFileThresholdBytes ( )
385+
359386 // Pass lfs patterns to the large file scanner to pre-filter them
360- const dynamicLarge = await getLargeFileAutoExcludePatterns (
387+ const { patterns : dynamicLarge , errorCounts } = await getLargeFileAutoExcludePatterns (
361388 workspacePath ,
362- DEFAULT_LARGE_FILE_THRESHOLD_BYTES ,
389+ thresholdBytes ,
363390 lfsPatterns ,
364391 )
365392
@@ -369,8 +396,9 @@ export async function getExcludePatternsWithStats(workspacePath: string): Promis
369396 patterns,
370397 stats : {
371398 largeFilesExcluded : dynamicLarge . length ,
372- thresholdBytes : DEFAULT_LARGE_FILE_THRESHOLD_BYTES ,
399+ thresholdBytes,
373400 sample : dynamicLarge . slice ( 0 , 10 ) ,
401+ errorCounts,
374402 } ,
375403 }
376404}
0 commit comments