Skip to content

Commit 5dac164

Browse files
authored
Merge pull request aws#7042 from jguoamz/listDirFix
fix(chat): Ignore directories and files that are not relevant to the code analysis in listDirectory
2 parents 741c2c4 + 88439de commit 5dac164

File tree

5 files changed

+101
-10
lines changed

5 files changed

+101
-10
lines changed

packages/core/src/codewhispererChat/constants.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,14 @@ export const defaultContextLengths: ContextLengths = {
5555
}
5656

5757
export const defaultStreamingResponseTimeoutInMs = 180_000
58+
59+
export const ignoredDirectoriesAndFiles = [
60+
// Dependency directories
61+
'node_modules',
62+
// Build outputs
63+
'dist',
64+
'build',
65+
'out',
66+
// OS specific files
67+
'.DS_Store',
68+
]

packages/core/src/codewhispererChat/tools/tool_index.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
},
8484
"listDirectory": {
8585
"name": "listDirectory",
86-
"description": "List the contents of a directory and its subdirectories.\n * Use this tool for discovery, before using more targeted tools like fsRead.\n *Useful to try to understand the file structure before diving deeper into specific files.\n *Can be used to explore the codebase.\n *Results clearly distinguish between files, directories or symlinks with [FILE], [DIR] and [LINK] prefixes.",
86+
"description": "List the contents of a directory and its subdirectories, it will filter out build outputs such as `build/`, `out/` and `dist` and dependency directory such as `node_modules/`.\n * Use this tool for discovery, before using more targeted tools like fsRead.\n *Useful to try to understand the file structure before diving deeper into specific files.\n *Can be used to explore the codebase.\n *Results clearly distinguish between files, directories or symlinks with [F], [D] and [L] prefixes.",
8787
"inputSchema": {
8888
"type": "object",
8989
"properties": {

packages/core/src/shared/utilities/workspaceUtils.ts

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import fs from '../fs/fs'
2020
import { ChildProcess } from './processUtils'
2121
import { isWin } from '../vscode/env'
2222
import { maxRepoSizeBytes } from '../../amazonqFeatureDev/constants'
23+
import { ignoredDirectoriesAndFiles } from '../../codewhispererChat/constants'
2324

2425
type GitIgnoreRelativeAcceptor = {
2526
folderPath: string
@@ -673,14 +674,14 @@ export async function findStringInDirectory(searchStr: string, dirPath: string)
673674
}
674675

675676
/**
676-
* Returns a prefix for a directory ('[DIR]'), symlink ('[LINK]'), or file ('[FILE]').
677+
* Returns a prefix for a directory ('[D]'), symlink ('[L]'), or file ('[F]').
677678
*/
678679
export function formatListing(name: string, fileType: vscode.FileType, fullPath: string): string {
679-
let typeChar = '[FILE]'
680+
let typeChar = '[F]'
680681
if (fileType === vscode.FileType.Directory) {
681-
typeChar = '[DIR]'
682+
typeChar = '[D]'
682683
} else if (fileType === vscode.FileType.SymbolicLink) {
683-
typeChar = '[LINK]'
684+
typeChar = '[L]'
684685
}
685686
return `${typeChar} ${fullPath}`
686687
}
@@ -689,6 +690,7 @@ export function formatListing(name: string, fileType: vscode.FileType, fullPath:
689690
* Recursively lists directories using a BFS approach, returning lines like:
690691
* d /absolute/path/to/folder
691692
* - /absolute/path/to/file.txt
693+
* Will filter out directories/files that should be ignored across most programming languages.
692694
*
693695
* You can either pass a custom callback or rely on the default `formatListing`.
694696
*
@@ -727,6 +729,10 @@ export async function readDirectoryRecursively(
727729
}
728730

729731
for (const [name, fileType] of entries) {
732+
if (shouldIgnoreDirAndFile(name, fileType)) {
733+
logger.debug(`Ignoring: ${name} in ${uri.fsPath}`)
734+
continue
735+
}
730736
const childUri = vscode.Uri.joinPath(uri, name)
731737
results.push(formatter(name, fileType, childUri.fsPath))
732738

@@ -739,6 +745,54 @@ export async function readDirectoryRecursively(
739745
return results
740746
}
741747

748+
export function shouldIgnoreDirAndFile(name: string, fileType: vscode.FileType): boolean {
749+
for (const pattern of ignoredDirectoriesAndFiles) {
750+
// Handle exact matches
751+
if (name === pattern) {
752+
return true
753+
}
754+
// Handle directory patterns that end with /
755+
if (pattern.endsWith('/') && fileType === vscode.FileType.Directory) {
756+
const dirName = pattern.slice(0, -1)
757+
if (name === dirName) {
758+
return true
759+
}
760+
continue
761+
}
762+
// Handle patterns with wildcards
763+
if (pattern.includes('*')) {
764+
// Handle patterns like "*.class" (wildcard at start)
765+
if (pattern.startsWith('*') && !pattern.endsWith('*')) {
766+
const suffix = pattern.slice(1)
767+
if (name.endsWith(suffix)) {
768+
return true
769+
}
770+
}
771+
// Handle patterns like "npm-debug.log*" (wildcard at end)
772+
else if (!pattern.startsWith('*') && pattern.endsWith('*')) {
773+
const prefix = pattern.slice(0, -1)
774+
if (name.startsWith(prefix)) {
775+
return true
776+
}
777+
}
778+
// Handle patterns like "*.env.*" or "*_credentials.*" (wildcards at both ends or middle)
779+
else {
780+
// Convert glob pattern to regex pattern
781+
const regexPattern = pattern
782+
// Escape dots
783+
.replace(/\./g, '\\.')
784+
// Convert * to .*
785+
.replace(/\*/g, '.*')
786+
const regex = new RegExp(`^${regexPattern}$`)
787+
if (regex.test(name)) {
788+
return true
789+
}
790+
}
791+
}
792+
}
793+
return false
794+
}
795+
742796
export function getWorkspacePaths() {
743797
const workspaceFolders = vscode.workspace.workspaceFolders
744798
return workspaceFolders?.map((folder) => folder.uri.fsPath) ?? []

packages/core/src/test/codewhispererChat/tools/listDirectory.test.ts

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ describe('ListDirectory Tool', () => {
4343
const result = await listDirectory.invoke(process.stdout)
4444

4545
const lines = result.output.content.split('\n')
46-
const hasFileA = lines.some((line: string | string[]) => line.includes('[FILE] ') && line.includes('fileA.txt'))
46+
const hasFileA = lines.some((line: string | string[]) => line.includes('[F] ') && line.includes('fileA.txt'))
4747
const hasSubfolder = lines.some(
48-
(line: string | string[]) => line.includes('[DIR] ') && line.includes('subfolder')
48+
(line: string | string[]) => line.includes('[D] ') && line.includes('subfolder')
4949
)
5050

5151
assert.ok(hasFileA, 'Should list fileA.txt in the directory output')
@@ -62,17 +62,35 @@ describe('ListDirectory Tool', () => {
6262
const result = await listDirectory.invoke(process.stdout)
6363

6464
const lines = result.output.content.split('\n')
65-
const hasFileA = lines.some((line: string | string[]) => line.includes('[FILE] ') && line.includes('fileA.txt'))
65+
const hasFileA = lines.some((line: string | string[]) => line.includes('[F] ') && line.includes('fileA.txt'))
6666
const hasSubfolder = lines.some(
67-
(line: string | string[]) => line.includes('[DIR] ') && line.includes('subfolder')
67+
(line: string | string[]) => line.includes('[D] ') && line.includes('subfolder')
6868
)
69-
const hasFileB = lines.some((line: string | string[]) => line.includes('[FILE] ') && line.includes('fileB.md'))
69+
const hasFileB = lines.some((line: string | string[]) => line.includes('[F] ') && line.includes('fileB.md'))
7070

7171
assert.ok(hasFileA, 'Should list fileA.txt in the directory output')
7272
assert.ok(hasSubfolder, 'Should list the subfolder in the directory output')
7373
assert.ok(hasFileB, 'Should list fileB.md in the subfolder in the directory output')
7474
})
7575

76+
it('lists directory contents with ignored pattern', async () => {
77+
await testFolder.mkdir('node_modules')
78+
await testFolder.write(path.join('node_modules', 'fileC.md'), '# fileC')
79+
80+
const listDirectory = new ListDirectory({ path: testFolder.path })
81+
await listDirectory.validate()
82+
const result = await listDirectory.invoke(process.stdout)
83+
84+
const lines = result.output.content.split('\n')
85+
const hasNodeModules = lines.some(
86+
(line: string | string[]) => line.includes('[D] ') && line.includes('node_modules')
87+
)
88+
const hasFileC = lines.some((line: string | string[]) => line.includes('[F] ') && line.includes('fileC.md'))
89+
90+
assert.ok(!hasNodeModules, 'Should not list node_modules in the directory output')
91+
assert.ok(!hasFileC, 'Should not list fileC.md under node_modules in the directory output')
92+
})
93+
7694
it('throws error if path does not exist', async () => {
7795
const missingPath = path.join(testFolder.path, 'no_such_file.txt')
7896
const listDirectory = new ListDirectory({ path: missingPath, maxDepth: 0 })

packages/core/src/testInteg/shared/utilities/workspaceUtils.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
findStringInDirectory,
1515
getWorkspaceFoldersByPrefixes,
1616
getWorkspaceRelativePath,
17+
shouldIgnoreDirAndFile,
1718
} from '../../../shared/utilities/workspaceUtils'
1819
import { getTestWorkspaceFolder } from '../../integrationTestsUtilities'
1920
import globals from '../../../shared/extensionGlobals'
@@ -591,6 +592,13 @@ describe('workspaceUtils', () => {
591592
})
592593
})
593594

595+
describe('shouldIgnoreDirAndFile', function () {
596+
it('handles exact matches', function () {
597+
assert.strictEqual(shouldIgnoreDirAndFile('node_modules', vscode.FileType.Directory), true)
598+
assert.strictEqual(shouldIgnoreDirAndFile('random_file.txt', vscode.FileType.File), false)
599+
})
600+
})
601+
594602
describe('findStringInDirectory', function () {
595603
it('prints the line with the detected string to stdout', async () => {
596604
const fileAmount = 1

0 commit comments

Comments
 (0)