@@ -293,14 +293,13 @@ export const defaultExcludePatterns = [
293293]
294294
295295export function getExcludePattern ( useDefaults : boolean = true ) {
296- const globAlwaysExcludedDirs = getGlobalExcludePatterns ( )
297- const allPatterns = [ ...globAlwaysExcludedDirs ]
296+ const patterns = [ ...getGlobalExcludePatterns ( ) ]
298297
299298 if ( useDefaults ) {
300- allPatterns . push ( ...defaultExcludePatterns )
299+ patterns . push ( ...defaultExcludePatterns )
301300 }
302301
303- return excludePatternsAsString ( allPatterns )
302+ return excludePatternsAsString ( patterns )
304303}
305304
306305function getGlobalExcludePatterns ( ) {
@@ -335,10 +334,19 @@ export type CollectFilesResultItem = {
335334 relativeFilePath : string
336335 fileUri : vscode . Uri
337336 fileContent : string
337+ fileSizeBytes : number
338338 zipFilePath : string
339339}
340340export type CollectFilesFilter = ( relativePath : string ) => boolean // returns true if file should be filtered out
341-
341+ interface CollectFilesOptions {
342+ maxTotalSizeBytes ?: number // 200 MB default
343+ maxFileSizeBytes ?: number // 10 MB default
344+ includeContent ?: boolean // default true
345+ failOnLimit ?: boolean // default true
346+ excludeByGitIgnore ?: boolean // default true
347+ excludePatterns ?: string [ ] // default defaultExcludePatterns
348+ filterFn ?: CollectFilesFilter
349+ }
342350/**
343351 * search files in sourcePaths and collect them using filtering options
344352 * @param sourcePaths the paths where collection starts
@@ -349,48 +357,40 @@ export type CollectFilesFilter = (relativePath: string) => boolean // returns tr
349357export async function collectFiles (
350358 sourcePaths : string [ ] ,
351359 workspaceFolders : CurrentWsFolders ,
352- options ?: {
353- maxSizeBytes ?: number // 200 MB default
354- excludeByGitIgnore ?: boolean // default true
355- excludePatterns ?: string [ ] // default defaultExcludePatterns
356- filterFn ?: CollectFilesFilter
357- }
358- ) : Promise < CollectFilesResultItem [ ] > {
359- const storage : Awaited < CollectFilesResultItem [ ] > = [ ]
360-
360+ options ?: ( CollectFilesOptions & { includeContent : true } ) | Omit < CollectFilesOptions , 'includeContent' >
361+ ) : Promise < CollectFilesResultItem [ ] >
362+ export async function collectFiles (
363+ sourcePaths : string [ ] ,
364+ workspaceFolders : CurrentWsFolders ,
365+ options ?: CollectFilesOptions & { includeContent : false }
366+ ) : Promise < Omit < CollectFilesResultItem , 'fileContent' > [ ] >
367+ export async function collectFiles (
368+ sourcePaths : string [ ] ,
369+ workspaceFolders : CurrentWsFolders ,
370+ options ?: CollectFilesOptions
371+ ) {
361372 const workspaceFoldersMapping = getWorkspaceFoldersByPrefixes ( workspaceFolders )
362373 const workspaceToPrefix = new Map < vscode . WorkspaceFolder , string > (
363374 workspaceFoldersMapping === undefined
364375 ? [ [ workspaceFolders [ 0 ] , '' ] ]
365376 : Object . entries ( workspaceFoldersMapping ) . map ( ( value ) => [ value [ 1 ] , value [ 0 ] ] )
366377 )
367- const prefixWithFolderPrefix = ( folder : vscode . WorkspaceFolder , path : string ) => {
368- const prefix = workspaceToPrefix . get ( folder )
369- /**
370- * collects all files that are marked as source
371- * @param sourcePaths the paths where collection starts
372- * @param workspaceFolders the current workspace folders opened
373- * @param respectGitIgnore whether to respect gitignore file
374- * @returns all matched files
375- */
376- if ( prefix === undefined ) {
377- throw new ToolkitError ( `Failed to find prefix for workspace folder ${ folder . name } ` )
378- }
379- return prefix === '' ? path : `${ prefix } /${ path } `
380- }
381-
382- let totalSizeBytes = 0
383378
379+ const includeContent = options ?. includeContent ?? true
380+ const maxFileSizeBytes = options ?. maxFileSizeBytes ?? 1024 * 1024 * 10
384381 const excludeByGitIgnore = options ?. excludeByGitIgnore ?? true
382+ const failOnLimit = options ?. failOnLimit ?? true
385383 const inputExcludePatterns = options ?. excludePatterns ?? defaultExcludePatterns
386- const maxSizeBytes = options ?. maxSizeBytes ?? maxRepoSizeBytes
384+ const maxSizeBytes = options ?. maxTotalSizeBytes ?? maxRepoSizeBytes
387385
388386 const excludePatterns = [ ...getGlobalExcludePatterns ( ) ]
389387 if ( inputExcludePatterns . length ) {
390388 excludePatterns . push ( ...inputExcludePatterns )
391389 }
392- const excludePatternFilter = excludePatternsAsString ( excludePatterns )
393390
391+ let totalSizeBytes = 0
392+ const storage = [ ]
393+ const excludePatternFilter = excludePatternsAsString ( excludePatterns )
394394 for ( const rootPath of sourcePaths ) {
395395 const allFiles = await vscode . workspace . findFiles (
396396 new vscode . RelativePattern ( rootPath , '**' ) ,
@@ -410,31 +410,56 @@ export async function collectFiles(
410410 }
411411
412412 const fileStat = await fs . stat ( file )
413- if ( totalSizeBytes + fileStat . size > maxSizeBytes ) {
413+ if ( failOnLimit && totalSizeBytes + fileStat . size > maxSizeBytes ) {
414414 throw new ToolkitError (
415415 'The project you have selected for source code is too large to use as context. Please select a different folder to use' ,
416416 { code : 'ContentLengthError' }
417417 )
418418 }
419419
420- const fileContent = await readFile ( file )
421-
422- if ( fileContent === undefined ) {
420+ if ( fileStat . size > maxFileSizeBytes ) {
423421 continue
424422 }
425423
426- // Now that we've read the file, increase our usage
427- totalSizeBytes += fileStat . size
428- storage . push ( {
424+ const result = {
429425 workspaceFolder : relativePath . workspaceFolder ,
430426 relativeFilePath : relativePath . relativePath ,
431427 fileUri : file ,
432- fileContent : fileContent ,
428+ fileSizeBytes : fileStat . size ,
433429 zipFilePath : prefixWithFolderPrefix ( relativePath . workspaceFolder , relativePath . relativePath ) ,
434- } )
430+ }
431+ if ( includeContent ) {
432+ const content = await readFile ( file )
433+ if ( content === undefined ) {
434+ continue
435+ }
436+ totalSizeBytes += fileStat . size
437+ storage . push ( {
438+ ...result ,
439+ fileContent : content ,
440+ } )
441+ } else {
442+ totalSizeBytes += fileStat . size
443+ storage . push ( result )
444+ }
435445 }
436446 }
437447 return storage
448+
449+ function prefixWithFolderPrefix ( folder : vscode . WorkspaceFolder , path : string ) {
450+ const prefix = workspaceToPrefix . get ( folder )
451+ /**
452+ * collects all files that are marked as source
453+ * @param sourcePaths the paths where collection starts
454+ * @param workspaceFolders the current workspace folders opened
455+ * @param respectGitIgnore whether to respect gitignore file
456+ * @returns all matched files
457+ */
458+ if ( prefix === undefined ) {
459+ throw new ToolkitError ( `Failed to find prefix for workspace folder ${ folder . name } ` )
460+ }
461+ return prefix === '' ? path : `${ prefix } /${ path } `
462+ }
438463}
439464
440465const readFile = async ( file : vscode . Uri ) => {
@@ -576,7 +601,7 @@ export function getWorkspaceFoldersByPrefixes(
576601 * 2. Must not be auto generated code
577602 * 3. Must not be within gitignore
578603 * 4. Ranked by priority.
579- * 5. Select files within maxSize limit.
604+ * 5. Select files within maxFileSize limit.
580605 * This function do not read the actual file content or compress them into a zip.
581606 * TODO: Move this to LSP
582607 * @param sourcePaths the paths where collection starts
@@ -590,65 +615,20 @@ export async function collectFilesForIndex(
590615 respectGitIgnore : boolean = true ,
591616 maxSize = 250 * 1024 * 1024 // 250 MB,
592617 // make this configurable, so we can test it
593- ) : Promise <
594- {
595- workspaceFolder : vscode . WorkspaceFolder
596- relativeFilePath : string
597- fileUri : vscode . Uri
598- fileSizeBytes : number
599- } [ ]
600- > {
601- const storage : Awaited < ReturnType < typeof collectFilesForIndex > > = [ ]
602-
603- const isLanguageSupported = ( filename : string ) => {
604- const k =
605- / \. ( j s | t s | j a v a | p y | r b | c p p | t s x | j s x | c c | c | c s | v b | p l | r | m | h s | m t s | m j s | h | c l j | d a r t | g r o o v y | l u a | r b | j l | i p y n b | h t m l | j s o n | c s s | m d | p h p | s w i f t | r s | s c a l a | y a m l | t f | s q l | s h | g o | y m l | k t | s m i t h y | c o n f i g | k t s | g r a d l e | c f g | x m l | v u e ) $ / i
606- return k . test ( filename ) || filename . endsWith ( 'Config' )
607- }
608-
609- const isBuildOrBin = ( filePath : string ) => {
610- const k = / [ / \\ ] ( b i n | b u i l d | n o d e _ m o d u l e s | e n v | \. i d e a | \. v e n v | v e n v ) [ / \\ ] / i
611- return k . test ( filePath )
612- }
613-
614- let totalSizeBytes = 0
615- for ( const rootPath of sourcePaths ) {
616- const allFiles = await vscode . workspace . findFiles (
617- new vscode . RelativePattern ( rootPath , '**' ) ,
618- getExcludePattern ( )
619- )
620- const files = respectGitIgnore ? await filterOutGitignoredFiles ( rootPath , allFiles ) : allFiles
621-
622- for ( const file of files ) {
623- if ( ! isLanguageSupported ( file . fsPath ) ) {
624- continue
625- }
626- if ( isBuildOrBin ( file . fsPath ) ) {
627- continue
628- }
629- const relativePath = getWorkspaceRelativePath ( file . fsPath , { workspaceFolders } )
630- if ( ! relativePath ) {
631- continue
632- }
633-
634- const fileStat = await fs . stat ( file )
635- // ignore single file over 10 MB
636- if ( fileStat . size > 10 * 1024 * 1024 ) {
637- continue
638- }
639- storage . push ( {
640- workspaceFolder : relativePath . workspaceFolder ,
641- relativeFilePath : relativePath . relativePath ,
642- fileUri : file ,
643- fileSizeBytes : fileStat . size ,
644- } )
645- }
646- }
618+ ) {
619+ const storage = await collectFiles ( sourcePaths , workspaceFolders , {
620+ maxFileSizeBytes : 10 * 1024 * 1024 ,
621+ includeContent : false ,
622+ failOnLimit : false ,
623+ excludeByGitIgnore : respectGitIgnore ,
624+ filterFn : ( rp ) => ! isLanguageSupported ( rp ) || isBuildOrBin ( rp ) ,
625+ } )
647626 // prioritize upper level files
648627 storage . sort ( ( a , b ) => a . fileUri . fsPath . length - b . fileUri . fsPath . length )
649628
650629 const maxSizeBytes = Math . min ( maxSize , os . freemem ( ) / 2 )
651630
631+ let totalSizeBytes = 0
652632 let i = 0
653633 for ( i = 0 ; i < storage . length ; i += 1 ) {
654634 totalSizeBytes += storage [ i ] . fileSizeBytes
@@ -658,6 +638,17 @@ export async function collectFilesForIndex(
658638 }
659639 // pick top 100k files below size limit
660640 return storage . slice ( 0 , Math . min ( 100000 , i ) )
641+
642+ function isLanguageSupported ( filename : string ) {
643+ const k =
644+ / \. ( j s | t s | j a v a | p y | r b | c p p | t s x | j s x | c c | c | c s | v b | p l | r | m | h s | m t s | m j s | h | c l j | d a r t | g r o o v y | l u a | r b | j l | i p y n b | h t m l | j s o n | c s s | m d | p h p | s w i f t | r s | s c a l a | y a m l | t f | s q l | s h | g o | y m l | k t | s m i t h y | c o n f i g | k t s | g r a d l e | c f g | x m l | v u e ) $ / i
645+ return k . test ( filename ) || filename . endsWith ( 'Config' )
646+ }
647+
648+ function isBuildOrBin ( filePath : string ) {
649+ const k = / [ / \\ ] ( b i n | b u i l d | n o d e _ m o d u l e s | e n v | \. i d e a | \. v e n v | v e n v ) [ / \\ ] / i
650+ return k . test ( filePath )
651+ }
661652}
662653
663654/**
0 commit comments