@@ -293,14 +293,13 @@ export const defaultExcludePatterns = [
293
293
]
294
294
295
295
export function getExcludePattern ( useDefaults : boolean = true ) {
296
- const globAlwaysExcludedDirs = getGlobalExcludePatterns ( )
297
- const allPatterns = [ ...globAlwaysExcludedDirs ]
296
+ const patterns = [ ...getGlobalExcludePatterns ( ) ]
298
297
299
298
if ( useDefaults ) {
300
- allPatterns . push ( ...defaultExcludePatterns )
299
+ patterns . push ( ...defaultExcludePatterns )
301
300
}
302
301
303
- return excludePatternsAsString ( allPatterns )
302
+ return excludePatternsAsString ( patterns )
304
303
}
305
304
306
305
function getGlobalExcludePatterns ( ) {
@@ -335,10 +334,19 @@ export type CollectFilesResultItem = {
335
334
relativeFilePath : string
336
335
fileUri : vscode . Uri
337
336
fileContent : string
337
+ fileSizeBytes : number
338
338
zipFilePath : string
339
339
}
340
340
export type CollectFilesFilter = ( relativePath : string ) => boolean // returns true if file should be filtered out
341
-
341
+ interface CollectFilesOptions {
342
+ maxTotalSizeBytes ?: number // 200 MB default
343
+ maxFileSizeBytes ?: number // 10 MB default
344
+ includeContent ?: boolean // default true
345
+ failOnLimit ?: boolean // default true
346
+ excludeByGitIgnore ?: boolean // default true
347
+ excludePatterns ?: string [ ] // default defaultExcludePatterns
348
+ filterFn ?: CollectFilesFilter
349
+ }
342
350
/**
343
351
* search files in sourcePaths and collect them using filtering options
344
352
* @param sourcePaths the paths where collection starts
@@ -349,48 +357,40 @@ export type CollectFilesFilter = (relativePath: string) => boolean // returns tr
349
357
export async function collectFiles (
350
358
sourcePaths : string [ ] ,
351
359
workspaceFolders : CurrentWsFolders ,
352
- options ?: {
353
- maxSizeBytes ?: number // 200 MB default
354
- excludeByGitIgnore ?: boolean // default true
355
- excludePatterns ?: string [ ] // default defaultExcludePatterns
356
- filterFn ?: CollectFilesFilter
357
- }
358
- ) : Promise < CollectFilesResultItem [ ] > {
359
- const storage : Awaited < CollectFilesResultItem [ ] > = [ ]
360
-
360
+ options ?: ( CollectFilesOptions & { includeContent : true } ) | Omit < CollectFilesOptions , 'includeContent' >
361
+ ) : Promise < CollectFilesResultItem [ ] >
362
+ export async function collectFiles (
363
+ sourcePaths : string [ ] ,
364
+ workspaceFolders : CurrentWsFolders ,
365
+ options ?: CollectFilesOptions & { includeContent : false }
366
+ ) : Promise < Omit < CollectFilesResultItem , 'fileContent' > [ ] >
367
+ export async function collectFiles (
368
+ sourcePaths : string [ ] ,
369
+ workspaceFolders : CurrentWsFolders ,
370
+ options ?: CollectFilesOptions
371
+ ) {
361
372
const workspaceFoldersMapping = getWorkspaceFoldersByPrefixes ( workspaceFolders )
362
373
const workspaceToPrefix = new Map < vscode . WorkspaceFolder , string > (
363
374
workspaceFoldersMapping === undefined
364
375
? [ [ workspaceFolders [ 0 ] , '' ] ]
365
376
: Object . entries ( workspaceFoldersMapping ) . map ( ( value ) => [ value [ 1 ] , value [ 0 ] ] )
366
377
)
367
- const prefixWithFolderPrefix = ( folder : vscode . WorkspaceFolder , path : string ) => {
368
- const prefix = workspaceToPrefix . get ( folder )
369
- /**
370
- * collects all files that are marked as source
371
- * @param sourcePaths the paths where collection starts
372
- * @param workspaceFolders the current workspace folders opened
373
- * @param respectGitIgnore whether to respect gitignore file
374
- * @returns all matched files
375
- */
376
- if ( prefix === undefined ) {
377
- throw new ToolkitError ( `Failed to find prefix for workspace folder ${ folder . name } ` )
378
- }
379
- return prefix === '' ? path : `${ prefix } /${ path } `
380
- }
381
-
382
- let totalSizeBytes = 0
383
378
379
+ const includeContent = options ?. includeContent ?? true
380
+ const maxFileSizeBytes = options ?. maxFileSizeBytes ?? 1024 * 1024 * 10
384
381
const excludeByGitIgnore = options ?. excludeByGitIgnore ?? true
382
+ const failOnLimit = options ?. failOnLimit ?? true
385
383
const inputExcludePatterns = options ?. excludePatterns ?? defaultExcludePatterns
386
- const maxSizeBytes = options ?. maxSizeBytes ?? maxRepoSizeBytes
384
+ const maxSizeBytes = options ?. maxTotalSizeBytes ?? maxRepoSizeBytes
387
385
388
386
const excludePatterns = [ ...getGlobalExcludePatterns ( ) ]
389
387
if ( inputExcludePatterns . length ) {
390
388
excludePatterns . push ( ...inputExcludePatterns )
391
389
}
392
- const excludePatternFilter = excludePatternsAsString ( excludePatterns )
393
390
391
+ let totalSizeBytes = 0
392
+ const storage = [ ]
393
+ const excludePatternFilter = excludePatternsAsString ( excludePatterns )
394
394
for ( const rootPath of sourcePaths ) {
395
395
const allFiles = await vscode . workspace . findFiles (
396
396
new vscode . RelativePattern ( rootPath , '**' ) ,
@@ -410,31 +410,56 @@ export async function collectFiles(
410
410
}
411
411
412
412
const fileStat = await fs . stat ( file )
413
- if ( totalSizeBytes + fileStat . size > maxSizeBytes ) {
413
+ if ( failOnLimit && totalSizeBytes + fileStat . size > maxSizeBytes ) {
414
414
throw new ToolkitError (
415
415
'The project you have selected for source code is too large to use as context. Please select a different folder to use' ,
416
416
{ code : 'ContentLengthError' }
417
417
)
418
418
}
419
419
420
- const fileContent = await readFile ( file )
421
-
422
- if ( fileContent === undefined ) {
420
+ if ( fileStat . size > maxFileSizeBytes ) {
423
421
continue
424
422
}
425
423
426
- // Now that we've read the file, increase our usage
427
- totalSizeBytes += fileStat . size
428
- storage . push ( {
424
+ const result = {
429
425
workspaceFolder : relativePath . workspaceFolder ,
430
426
relativeFilePath : relativePath . relativePath ,
431
427
fileUri : file ,
432
- fileContent : fileContent ,
428
+ fileSizeBytes : fileStat . size ,
433
429
zipFilePath : prefixWithFolderPrefix ( relativePath . workspaceFolder , relativePath . relativePath ) ,
434
- } )
430
+ }
431
+ if ( includeContent ) {
432
+ const content = await readFile ( file )
433
+ if ( content === undefined ) {
434
+ continue
435
+ }
436
+ totalSizeBytes += fileStat . size
437
+ storage . push ( {
438
+ ...result ,
439
+ fileContent : content ,
440
+ } )
441
+ } else {
442
+ totalSizeBytes += fileStat . size
443
+ storage . push ( result )
444
+ }
435
445
}
436
446
}
437
447
return storage
448
+
449
+ function prefixWithFolderPrefix ( folder : vscode . WorkspaceFolder , path : string ) {
450
+ const prefix = workspaceToPrefix . get ( folder )
451
+ /**
452
+ * collects all files that are marked as source
453
+ * @param sourcePaths the paths where collection starts
454
+ * @param workspaceFolders the current workspace folders opened
455
+ * @param respectGitIgnore whether to respect gitignore file
456
+ * @returns all matched files
457
+ */
458
+ if ( prefix === undefined ) {
459
+ throw new ToolkitError ( `Failed to find prefix for workspace folder ${ folder . name } ` )
460
+ }
461
+ return prefix === '' ? path : `${ prefix } /${ path } `
462
+ }
438
463
}
439
464
440
465
const readFile = async ( file : vscode . Uri ) => {
@@ -576,7 +601,7 @@ export function getWorkspaceFoldersByPrefixes(
576
601
* 2. Must not be auto generated code
577
602
* 3. Must not be within gitignore
578
603
* 4. Ranked by priority.
579
- * 5. Select files within maxSize limit.
604
+ * 5. Select files within maxFileSize limit.
580
605
* This function do not read the actual file content or compress them into a zip.
581
606
* TODO: Move this to LSP
582
607
* @param sourcePaths the paths where collection starts
@@ -590,65 +615,20 @@ export async function collectFilesForIndex(
590
615
respectGitIgnore : boolean = true ,
591
616
maxSize = 250 * 1024 * 1024 // 250 MB,
592
617
// make this configurable, so we can test it
593
- ) : Promise <
594
- {
595
- workspaceFolder : vscode . WorkspaceFolder
596
- relativeFilePath : string
597
- fileUri : vscode . Uri
598
- fileSizeBytes : number
599
- } [ ]
600
- > {
601
- const storage : Awaited < ReturnType < typeof collectFilesForIndex > > = [ ]
602
-
603
- const isLanguageSupported = ( filename : string ) => {
604
- const k =
605
- / \. ( j s | t s | j a v a | p y | r b | c p p | t s x | j s x | c c | c | c s | v b | p l | r | m | h s | m t s | m j s | h | c l j | d a r t | g r o o v y | l u a | r b | j l | i p y n b | h t m l | j s o n | c s s | m d | p h p | s w i f t | r s | s c a l a | y a m l | t f | s q l | s h | g o | y m l | k t | s m i t h y | c o n f i g | k t s | g r a d l e | c f g | x m l | v u e ) $ / i
606
- return k . test ( filename ) || filename . endsWith ( 'Config' )
607
- }
608
-
609
- const isBuildOrBin = ( filePath : string ) => {
610
- const k = / [ / \\ ] ( b i n | b u i l d | n o d e _ m o d u l e s | e n v | \. i d e a | \. v e n v | v e n v ) [ / \\ ] / i
611
- return k . test ( filePath )
612
- }
613
-
614
- let totalSizeBytes = 0
615
- for ( const rootPath of sourcePaths ) {
616
- const allFiles = await vscode . workspace . findFiles (
617
- new vscode . RelativePattern ( rootPath , '**' ) ,
618
- getExcludePattern ( )
619
- )
620
- const files = respectGitIgnore ? await filterOutGitignoredFiles ( rootPath , allFiles ) : allFiles
621
-
622
- for ( const file of files ) {
623
- if ( ! isLanguageSupported ( file . fsPath ) ) {
624
- continue
625
- }
626
- if ( isBuildOrBin ( file . fsPath ) ) {
627
- continue
628
- }
629
- const relativePath = getWorkspaceRelativePath ( file . fsPath , { workspaceFolders } )
630
- if ( ! relativePath ) {
631
- continue
632
- }
633
-
634
- const fileStat = await fs . stat ( file )
635
- // ignore single file over 10 MB
636
- if ( fileStat . size > 10 * 1024 * 1024 ) {
637
- continue
638
- }
639
- storage . push ( {
640
- workspaceFolder : relativePath . workspaceFolder ,
641
- relativeFilePath : relativePath . relativePath ,
642
- fileUri : file ,
643
- fileSizeBytes : fileStat . size ,
644
- } )
645
- }
646
- }
618
+ ) {
619
+ const storage = await collectFiles ( sourcePaths , workspaceFolders , {
620
+ maxFileSizeBytes : 10 * 1024 * 1024 ,
621
+ includeContent : false ,
622
+ failOnLimit : false ,
623
+ excludeByGitIgnore : respectGitIgnore ,
624
+ filterFn : ( rp ) => ! isLanguageSupported ( rp ) || isBuildOrBin ( rp ) ,
625
+ } )
647
626
// prioritize upper level files
648
627
storage . sort ( ( a , b ) => a . fileUri . fsPath . length - b . fileUri . fsPath . length )
649
628
650
629
const maxSizeBytes = Math . min ( maxSize , os . freemem ( ) / 2 )
651
630
631
+ let totalSizeBytes = 0
652
632
let i = 0
653
633
for ( i = 0 ; i < storage . length ; i += 1 ) {
654
634
totalSizeBytes += storage [ i ] . fileSizeBytes
@@ -658,6 +638,17 @@ export async function collectFilesForIndex(
658
638
}
659
639
// pick top 100k files below size limit
660
640
return storage . slice ( 0 , Math . min ( 100000 , i ) )
641
+
642
+ function isLanguageSupported ( filename : string ) {
643
+ const k =
644
+ / \. ( j s | t s | j a v a | p y | r b | c p p | t s x | j s x | c c | c | c s | v b | p l | r | m | h s | m t s | m j s | h | c l j | d a r t | g r o o v y | l u a | r b | j l | i p y n b | h t m l | j s o n | c s s | m d | p h p | s w i f t | r s | s c a l a | y a m l | t f | s q l | s h | g o | y m l | k t | s m i t h y | c o n f i g | k t s | g r a d l e | c f g | x m l | v u e ) $ / i
645
+ return k . test ( filename ) || filename . endsWith ( 'Config' )
646
+ }
647
+
648
+ function isBuildOrBin ( filePath : string ) {
649
+ const k = / [ / \\ ] ( b i n | b u i l d | n o d e _ m o d u l e s | e n v | \. i d e a | \. v e n v | v e n v ) [ / \\ ] / i
650
+ return k . test ( filePath )
651
+ }
661
652
}
662
653
663
654
/**
0 commit comments