@@ -293,14 +293,13 @@ export const defaultExcludePatterns = [
293293] 
294294
295295export  function  getExcludePattern ( useDefaults : boolean  =  true )  { 
296-     const  globAlwaysExcludedDirs  =  getGlobalExcludePatterns ( ) 
297-     const  allPatterns  =  [ ...globAlwaysExcludedDirs ] 
296+     const  patterns  =  [ ...getGlobalExcludePatterns ( ) ] 
298297
299298    if  ( useDefaults )  { 
300-         allPatterns . push ( ...defaultExcludePatterns ) 
299+         patterns . push ( ...defaultExcludePatterns ) 
301300    } 
302301
303-     return  excludePatternsAsString ( allPatterns ) 
302+     return  excludePatternsAsString ( patterns ) 
304303} 
305304
306305function  getGlobalExcludePatterns ( )  { 
@@ -335,10 +334,19 @@ export type CollectFilesResultItem = {
335334    relativeFilePath : string 
336335    fileUri : vscode . Uri 
337336    fileContent : string 
337+     fileSizeBytes : number 
338338    zipFilePath : string 
339339} 
340340export  type  CollectFilesFilter  =  ( relativePath : string )  =>  boolean  // returns true if file should be filtered out 
341- 
341+ interface  CollectFilesOptions  { 
342+     maxTotalSizeBytes ?: number  // 200 MB default 
343+     maxFileSizeBytes ?: number  // 10 MB default 
344+     includeContent ?: boolean  // default true 
345+     failOnLimit ?: boolean  // default true 
346+     excludeByGitIgnore ?: boolean  // default true 
347+     excludePatterns ?: string [ ]  // default defaultExcludePatterns 
348+     filterFn ?: CollectFilesFilter 
349+ } 
342350/** 
343351 * search files in sourcePaths and collect them using filtering options 
344352 * @param  sourcePaths the paths where collection starts 
@@ -349,48 +357,40 @@ export type CollectFilesFilter = (relativePath: string) => boolean // returns tr
349357export  async  function  collectFiles ( 
350358    sourcePaths : string [ ] , 
351359    workspaceFolders : CurrentWsFolders , 
352-     options ?: { 
353-         maxSizeBytes ?: number  // 200 MB default 
354-         excludeByGitIgnore ?: boolean  // default true 
355-         excludePatterns ?: string [ ]  // default defaultExcludePatterns 
356-         filterFn ?: CollectFilesFilter 
357-     } 
358- ) : Promise < CollectFilesResultItem [ ] >  { 
359-     const  storage : Awaited < CollectFilesResultItem [ ] >  =  [ ] 
360- 
360+     options ?: ( CollectFilesOptions  &  {  includeContent : true  } )  |  Omit < CollectFilesOptions ,  'includeContent' > 
361+ ) : Promise < CollectFilesResultItem [ ] > 
362+ export  async  function  collectFiles ( 
363+     sourcePaths : string [ ] , 
364+     workspaceFolders : CurrentWsFolders , 
365+     options ?: CollectFilesOptions  &  {  includeContent : false  } 
366+ ) : Promise < Omit < CollectFilesResultItem ,  'fileContent' > [ ] > 
367+ export  async  function  collectFiles ( 
368+     sourcePaths : string [ ] , 
369+     workspaceFolders : CurrentWsFolders , 
370+     options ?: CollectFilesOptions 
371+ )  { 
361372    const  workspaceFoldersMapping  =  getWorkspaceFoldersByPrefixes ( workspaceFolders ) 
362373    const  workspaceToPrefix  =  new  Map < vscode . WorkspaceFolder ,  string > ( 
363374        workspaceFoldersMapping  ===  undefined 
364375            ? [ [ workspaceFolders [ 0 ] ,  '' ] ] 
365376            : Object . entries ( workspaceFoldersMapping ) . map ( ( value )  =>  [ value [ 1 ] ,  value [ 0 ] ] ) 
366377    ) 
367-     const  prefixWithFolderPrefix  =  ( folder : vscode . WorkspaceFolder ,  path : string )  =>  { 
368-         const  prefix  =  workspaceToPrefix . get ( folder ) 
369-         /** 
370-          * collects all files that are marked as source 
371-          * @param  sourcePaths the paths where collection starts 
372-          * @param  workspaceFolders the current workspace folders opened 
373-          * @param  respectGitIgnore whether to respect gitignore file 
374-          * @returns  all matched files 
375-          */ 
376-         if  ( prefix  ===  undefined )  { 
377-             throw  new  ToolkitError ( `Failed to find prefix for workspace folder ${ folder . name }  ` ) 
378-         } 
379-         return  prefix  ===  ''  ? path  : `${ prefix }  /${ path }  ` 
380-     } 
381- 
382-     let  totalSizeBytes  =  0 
383378
379+     const  includeContent  =  options ?. includeContent  ??  true 
380+     const  maxFileSizeBytes  =  options ?. maxFileSizeBytes  ??  1024  *  1024  *  10 
384381    const  excludeByGitIgnore  =  options ?. excludeByGitIgnore  ??  true 
382+     const  failOnLimit  =  options ?. failOnLimit  ??  true 
385383    const  inputExcludePatterns  =  options ?. excludePatterns  ??  defaultExcludePatterns 
386-     const  maxSizeBytes  =  options ?. maxSizeBytes  ??  maxRepoSizeBytes 
384+     const  maxSizeBytes  =  options ?. maxTotalSizeBytes  ??  maxRepoSizeBytes 
387385
388386    const  excludePatterns  =  [ ...getGlobalExcludePatterns ( ) ] 
389387    if  ( inputExcludePatterns . length )  { 
390388        excludePatterns . push ( ...inputExcludePatterns ) 
391389    } 
392-     const  excludePatternFilter  =  excludePatternsAsString ( excludePatterns ) 
393390
391+     let  totalSizeBytes  =  0 
392+     const  storage  =  [ ] 
393+     const  excludePatternFilter  =  excludePatternsAsString ( excludePatterns ) 
394394    for  ( const  rootPath  of  sourcePaths )  { 
395395        const  allFiles  =  await  vscode . workspace . findFiles ( 
396396            new  vscode . RelativePattern ( rootPath ,  '**' ) , 
@@ -410,31 +410,56 @@ export async function collectFiles(
410410            } 
411411
412412            const  fileStat  =  await  fs . stat ( file ) 
413-             if  ( totalSizeBytes  +  fileStat . size  >  maxSizeBytes )  { 
413+             if  ( failOnLimit   &&   totalSizeBytes  +  fileStat . size  >  maxSizeBytes )  { 
414414                throw  new  ToolkitError ( 
415415                    'The project you have selected for source code is too large to use as context. Please select a different folder to use' , 
416416                    {  code : 'ContentLengthError'  } 
417417                ) 
418418            } 
419419
420-             const  fileContent  =  await  readFile ( file ) 
421- 
422-             if  ( fileContent  ===  undefined )  { 
420+             if  ( fileStat . size  >  maxFileSizeBytes )  { 
423421                continue 
424422            } 
425423
426-             // Now that we've read the file, increase our usage 
427-             totalSizeBytes  +=  fileStat . size 
428-             storage . push ( { 
424+             const  result  =  { 
429425                workspaceFolder : relativePath . workspaceFolder , 
430426                relativeFilePath : relativePath . relativePath , 
431427                fileUri : file , 
432-                 fileContent :  fileContent , 
428+                 fileSizeBytes :  fileStat . size , 
433429                zipFilePath : prefixWithFolderPrefix ( relativePath . workspaceFolder ,  relativePath . relativePath ) , 
434-             } ) 
430+             } 
431+             if  ( includeContent )  { 
432+                 const  content  =  await  readFile ( file ) 
433+                 if  ( content  ===  undefined )  { 
434+                     continue 
435+                 } 
436+                 totalSizeBytes  +=  fileStat . size 
437+                 storage . push ( { 
438+                     ...result , 
439+                     fileContent : content , 
440+                 } ) 
441+             }  else  { 
442+                 totalSizeBytes  +=  fileStat . size 
443+                 storage . push ( result ) 
444+             } 
435445        } 
436446    } 
437447    return  storage 
448+ 
449+     function  prefixWithFolderPrefix ( folder : vscode . WorkspaceFolder ,  path : string )  { 
450+         const  prefix  =  workspaceToPrefix . get ( folder ) 
451+         /** 
452+          * collects all files that are marked as source 
453+          * @param  sourcePaths the paths where collection starts 
454+          * @param  workspaceFolders the current workspace folders opened 
455+          * @param  respectGitIgnore whether to respect gitignore file 
456+          * @returns  all matched files 
457+          */ 
458+         if  ( prefix  ===  undefined )  { 
459+             throw  new  ToolkitError ( `Failed to find prefix for workspace folder ${ folder . name }  ` ) 
460+         } 
461+         return  prefix  ===  ''  ? path  : `${ prefix }  /${ path }  ` 
462+     } 
438463} 
439464
440465const  readFile  =  async  ( file : vscode . Uri )  =>  { 
@@ -576,7 +601,7 @@ export function getWorkspaceFoldersByPrefixes(
576601 * 2. Must not be auto generated code 
577602 * 3. Must not be within gitignore 
578603 * 4. Ranked by priority. 
579-  * 5. Select files within maxSize  limit. 
604+  * 5. Select files within maxFileSize  limit. 
580605 * This function do not read the actual file content or compress them into a zip. 
581606 * TODO: Move this to LSP 
582607 * @param  sourcePaths the paths where collection starts 
@@ -590,65 +615,20 @@ export async function collectFilesForIndex(
590615    respectGitIgnore : boolean  =  true , 
591616    maxSize  =  250  *  1024  *  1024  // 250 MB, 
592617    // make this configurable, so we can test it 
593- ) : Promise < 
594-     { 
595-         workspaceFolder : vscode . WorkspaceFolder 
596-         relativeFilePath : string 
597-         fileUri : vscode . Uri 
598-         fileSizeBytes : number 
599-     } [ ] 
600- >  { 
601-     const  storage : Awaited < ReturnType < typeof  collectFilesForIndex > >  =  [ ] 
602- 
603-     const  isLanguageSupported  =  ( filename : string )  =>  { 
604-         const  k  = 
605-             / \. ( j s | t s | j a v a | p y | r b | c p p | t s x | j s x | c c | c | c s | v b | p l | r | m | h s | m t s | m j s | h | c l j | d a r t | g r o o v y | l u a | r b | j l | i p y n b | h t m l | j s o n | c s s | m d | p h p | s w i f t | r s | s c a l a | y a m l | t f | s q l | s h | g o | y m l | k t | s m i t h y | c o n f i g | k t s | g r a d l e | c f g | x m l | v u e ) $ / i
606-         return  k . test ( filename )  ||  filename . endsWith ( 'Config' ) 
607-     } 
608- 
609-     const  isBuildOrBin  =  ( filePath : string )  =>  { 
610-         const  k  =  / [ / \\ ] ( b i n | b u i l d | n o d e _ m o d u l e s | e n v | \. i d e a | \. v e n v | v e n v ) [ / \\ ] / i
611-         return  k . test ( filePath ) 
612-     } 
613- 
614-     let  totalSizeBytes  =  0 
615-     for  ( const  rootPath  of  sourcePaths )  { 
616-         const  allFiles  =  await  vscode . workspace . findFiles ( 
617-             new  vscode . RelativePattern ( rootPath ,  '**' ) , 
618-             getExcludePattern ( ) 
619-         ) 
620-         const  files  =  respectGitIgnore  ? await  filterOutGitignoredFiles ( rootPath ,  allFiles )  : allFiles 
621- 
622-         for  ( const  file  of  files )  { 
623-             if  ( ! isLanguageSupported ( file . fsPath ) )  { 
624-                 continue 
625-             } 
626-             if  ( isBuildOrBin ( file . fsPath ) )  { 
627-                 continue 
628-             } 
629-             const  relativePath  =  getWorkspaceRelativePath ( file . fsPath ,  {  workspaceFolders } ) 
630-             if  ( ! relativePath )  { 
631-                 continue 
632-             } 
633- 
634-             const  fileStat  =  await  fs . stat ( file ) 
635-             // ignore single file over 10 MB 
636-             if  ( fileStat . size  >  10  *  1024  *  1024 )  { 
637-                 continue 
638-             } 
639-             storage . push ( { 
640-                 workspaceFolder : relativePath . workspaceFolder , 
641-                 relativeFilePath : relativePath . relativePath , 
642-                 fileUri : file , 
643-                 fileSizeBytes : fileStat . size , 
644-             } ) 
645-         } 
646-     } 
618+ )  { 
619+     const  storage  =  await  collectFiles ( sourcePaths ,  workspaceFolders ,  { 
620+         maxFileSizeBytes : 10  *  1024  *  1024 , 
621+         includeContent : false , 
622+         failOnLimit : false , 
623+         excludeByGitIgnore : respectGitIgnore , 
624+         filterFn : ( rp )  =>  ! isLanguageSupported ( rp )  ||  isBuildOrBin ( rp ) , 
625+     } ) 
647626    // prioritize upper level files 
648627    storage . sort ( ( a ,  b )  =>  a . fileUri . fsPath . length  -  b . fileUri . fsPath . length ) 
649628
650629    const  maxSizeBytes  =  Math . min ( maxSize ,  os . freemem ( )  /  2 ) 
651630
631+     let  totalSizeBytes  =  0 
652632    let  i  =  0 
653633    for  ( i  =  0 ;  i  <  storage . length ;  i  +=  1 )  { 
654634        totalSizeBytes  +=  storage [ i ] . fileSizeBytes 
@@ -658,6 +638,17 @@ export async function collectFilesForIndex(
658638    } 
659639    // pick top 100k files below size limit 
660640    return  storage . slice ( 0 ,  Math . min ( 100000 ,  i ) ) 
641+ 
642+     function  isLanguageSupported ( filename : string )  { 
643+         const  k  = 
644+             / \. ( j s | t s | j a v a | p y | r b | c p p | t s x | j s x | c c | c | c s | v b | p l | r | m | h s | m t s | m j s | h | c l j | d a r t | g r o o v y | l u a | r b | j l | i p y n b | h t m l | j s o n | c s s | m d | p h p | s w i f t | r s | s c a l a | y a m l | t f | s q l | s h | g o | y m l | k t | s m i t h y | c o n f i g | k t s | g r a d l e | c f g | x m l | v u e ) $ / i
645+         return  k . test ( filename )  ||  filename . endsWith ( 'Config' ) 
646+     } 
647+ 
648+     function  isBuildOrBin ( filePath : string )  { 
649+         const  k  =  / [ / \\ ] ( b i n | b u i l d | n o d e _ m o d u l e s | e n v | \. i d e a | \. v e n v | v e n v ) [ / \\ ] / i
650+         return  k . test ( filePath ) 
651+     } 
661652} 
662653
663654/** 
0 commit comments