Skip to content

Commit aa8593c

Browse files
authored
fix(amazonq): incorrect project scan size limit #4924
Problem `collectFiles` method uses a different max payload size value than security scans. Solution - Pass the desired max value to `collectFiles` - Use 500MB as the max size
1 parent 2d22d9d commit aa8593c

File tree

9 files changed

+590
-579
lines changed

9 files changed

+590
-579
lines changed

packages/core/src/amazonqFeatureDev/controllers/chat/controller.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ import { submitFeedback } from '../../../feedback/vue/submitFeedback'
3232
import { placeholder } from '../../../shared/vscode/commands2'
3333
import { EditorContentController } from '../../../amazonq/commons/controllers/contentController'
3434
import { openUrl } from '../../../shared/utilities/vsCodeUtils'
35-
import { getPathsFromZipFilePath, getWorkspaceFoldersByPrefixes } from '../../util/files'
35+
import { getPathsFromZipFilePath } from '../../util/files'
3636
import { examples, newTaskChanges, approachCreation, sessionClosed, updateCode } from '../../userFacingText'
37+
import { getWorkspaceFoldersByPrefixes } from '../../../shared/utilities/workspaceUtils'
3738

3839
export interface ChatControllerEventEmitters {
3940
readonly processHumanChatMessage: EventEmitter<any>

packages/core/src/amazonqFeatureDev/session/sessionState.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@ import {
2626
SessionStateInteraction,
2727
SessionStatePhase,
2828
} from '../types'
29-
import { collectFiles, getWorkspaceFoldersByPrefixes, prepareRepoData } from '../util/files'
29+
import { prepareRepoData } from '../util/files'
3030
import { TelemetryHelper } from '../util/telemetryHelper'
3131
import { uploadCode } from '../util/upload'
3232
import { CodeReference } from '../../amazonq/webview/ui/connector'
3333
import { isPresent } from '../../shared/utilities/collectionUtils'
3434
import { encodeHTML } from '../../shared/utilities/textUtilities'
3535
import { AuthUtil } from '../../codewhisperer/util/authUtil'
3636
import { randomUUID } from '../../common/crypto'
37+
import { collectFiles, getWorkspaceFoldersByPrefixes } from '../../shared/utilities/workspaceUtils'
3738

3839
export class ConversationNotStartedState implements Omit<SessionState, 'uploadId'> {
3940
public tokenSource: vscode.CancellationTokenSource

packages/core/src/amazonqFeatureDev/util/files.ts

Lines changed: 1 addition & 247 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@
55

66
import * as vscode from 'vscode'
77
import * as path from 'path'
8-
import { SystemUtilities } from '../../shared/systemUtilities'
9-
import { getGlobDirExcludedPatterns } from '../../shared/fs/watchedFiles'
10-
import { getWorkspaceRelativePath } from '../../shared/utilities/workspaceUtils'
11-
import { Uri } from 'vscode'
12-
import { GitIgnoreFilter } from './gitignore'
8+
import { collectFiles } from '../../shared/utilities/workspaceUtils'
139

1410
import AdmZip from 'adm-zip'
1511
import { ContentLengthError, PrepareRepoFailedError } from '../errors'
@@ -20,131 +16,6 @@ import { CurrentWsFolders } from '../types'
2016
import { ToolkitError } from '../../shared/errors'
2117
import { AmazonqCreateUpload, Metric } from '../../shared/telemetry/telemetry'
2218
import { TelemetryHelper } from './telemetryHelper'
23-
import { sanitizeFilename } from '../../shared/utilities/textUtilities'
24-
import { maxRepoSizeBytes } from '../constants'
25-
26-
export function getExcludePattern(additionalPatterns: string[] = []) {
27-
const globAlwaysExcludedDirs = getGlobDirExcludedPatterns().map(pattern => `**/${pattern}/*`)
28-
const extraPatterns = [
29-
'**/package-lock.json',
30-
'**/yarn.lock',
31-
'**/*.zip',
32-
'**/*.bin',
33-
'**/*.png',
34-
'**/*.jpg',
35-
'**/*.svg',
36-
'**/*.pyc',
37-
'**/license.txt',
38-
'**/License.txt',
39-
'**/LICENSE.txt',
40-
'**/license.md',
41-
'**/License.md',
42-
'**/LICENSE.md',
43-
]
44-
const allPatterns = [...globAlwaysExcludedDirs, ...extraPatterns, ...additionalPatterns]
45-
return `{${allPatterns.join(',')}}`
46-
}
47-
48-
/**
49-
* @param rootPath root folder to look for .gitignore files
50-
* @returns list of glob patterns extracted from .gitignore
51-
* These patterns are compatible with vscode exclude patterns
52-
*/
53-
async function filterOutGitignoredFiles(rootPath: string, files: Uri[]): Promise<Uri[]> {
54-
const gitIgnoreFiles = await vscode.workspace.findFiles(
55-
new vscode.RelativePattern(rootPath, '**/.gitignore'),
56-
getExcludePattern()
57-
)
58-
const gitIgnoreFilter = await GitIgnoreFilter.build(gitIgnoreFiles)
59-
return gitIgnoreFilter.filterFiles(files)
60-
}
61-
62-
/**
63-
* collects all files that are marked as source
64-
* @param sourcePaths the paths where collection starts
65-
* @param workspaceFolders the current workspace folders opened
66-
* @param respectGitIgnore whether to respect gitignore file
67-
* @returns all matched files
68-
*/
69-
export async function collectFiles(
70-
sourcePaths: string[],
71-
workspaceFolders: CurrentWsFolders,
72-
respectGitIgnore: boolean = true
73-
): Promise<
74-
{
75-
workspaceFolder: vscode.WorkspaceFolder
76-
relativeFilePath: string
77-
fileUri: vscode.Uri
78-
fileContent: string
79-
zipFilePath: string
80-
}[]
81-
> {
82-
const storage: Awaited<ReturnType<typeof collectFiles>> = []
83-
84-
const workspaceFoldersMapping = getWorkspaceFoldersByPrefixes(workspaceFolders)
85-
const workspaceToPrefix = new Map<vscode.WorkspaceFolder, string>(
86-
workspaceFoldersMapping === undefined
87-
? [[workspaceFolders[0], '']]
88-
: Object.entries(workspaceFoldersMapping).map(value => [value[1], value[0]])
89-
)
90-
const prefixWithFolderPrefix = (folder: vscode.WorkspaceFolder, path: string) => {
91-
const prefix = workspaceToPrefix.get(folder)
92-
if (prefix === undefined) {
93-
throw new ToolkitError(`Failed to find prefix for workspace folder ${folder.name}`)
94-
}
95-
return prefix === '' ? path : `${prefix}/${path}`
96-
}
97-
98-
let totalSizeBytes = 0
99-
for (const rootPath of sourcePaths) {
100-
const allFiles = await vscode.workspace.findFiles(
101-
new vscode.RelativePattern(rootPath, '**'),
102-
getExcludePattern()
103-
)
104-
const files = respectGitIgnore ? await filterOutGitignoredFiles(rootPath, allFiles) : allFiles
105-
106-
for (const file of files) {
107-
const relativePath = getWorkspaceRelativePath(file.fsPath, { workspaceFolders })
108-
if (!relativePath) {
109-
continue
110-
}
111-
112-
const fileStat = await vscode.workspace.fs.stat(file)
113-
if (totalSizeBytes + fileStat.size > maxRepoSizeBytes) {
114-
throw new ContentLengthError()
115-
}
116-
117-
const fileContent = await readFile(file)
118-
if (fileContent === undefined) {
119-
continue
120-
}
121-
122-
// Now that we've read the file, increase our usage
123-
totalSizeBytes += fileStat.size
124-
storage.push({
125-
workspaceFolder: relativePath.workspaceFolder,
126-
relativeFilePath: relativePath.relativePath,
127-
fileUri: file,
128-
fileContent: fileContent,
129-
zipFilePath: prefixWithFolderPrefix(relativePath.workspaceFolder, relativePath.relativePath),
130-
})
131-
}
132-
}
133-
return storage
134-
}
135-
136-
const readFile = async (file: vscode.Uri) => {
137-
try {
138-
const fileContent = await SystemUtilities.readFile(file, new TextDecoder('utf8', { fatal: false }))
139-
return fileContent
140-
} catch (error) {
141-
getLogger().debug(
142-
`featureDev: Failed to read file ${file.fsPath} when collecting repository. Skipping the file`
143-
)
144-
}
145-
146-
return undefined
147-
}
14819

14920
const getSha256 = (file: Buffer) => createHash('sha256').update(file).digest('base64')
15021

@@ -191,123 +62,6 @@ export async function prepareRepoData(
19162
}
19263
}
19364

194-
const workspaceFolderPrefixGuards = {
195-
/**
196-
* the maximum number of subfolders the method below takes into account when calculating a prefix
197-
*/
198-
maximumFolderDepthConsidered: 500,
199-
/**
200-
* the maximum suffix that can be added to a folder prefix in case of full subfolder path matches
201-
*/
202-
maximumFoldersWithMatchingSubfolders: 10_000,
203-
}
204-
205-
/**
206-
* tries to determine the possible prefixes we will use for a given workspace folder in the zip file
207-
* We want to keep the folder names in the prefix, since they might convey useful information, for example
208-
* If both folders are just called cdk (no name specified for the ws folder), adding a prefix of cdk1 and cdk2 is much less context, than having app_cdk and canaries_cdk
209-
*
210-
* Input:
211-
* - packages/app/cdk
212-
* - packages/canaries/cdk
213-
* Output:
214-
* - {'app_cdk': packages/app/cdk, 'canaries_cdk': packages/canaries/cdk}
215-
*
216-
* @returns an object where workspace folders have a prefix, or undefined for single root workspace, as there is no mapping needed there
217-
*/
218-
export function getWorkspaceFoldersByPrefixes(
219-
folders: CurrentWsFolders
220-
): { [prefix: string]: vscode.WorkspaceFolder } | undefined {
221-
if (folders.length <= 1) {
222-
return undefined
223-
}
224-
let remainingWorkspaceFoldersToMap = folders.map(f => ({
225-
folder: f,
226-
preferredPrefixQueue: f.uri.fsPath
227-
.split(path.sep)
228-
.reverse()
229-
.slice(0, workspaceFolderPrefixGuards.maximumFolderDepthConsidered)
230-
.reduce(
231-
(candidates, subDir) => {
232-
candidates.push(sanitizeFilename(path.join(subDir, candidates[candidates.length - 1])))
233-
return candidates
234-
},
235-
[f.name]
236-
)
237-
.reverse(),
238-
}))
239-
const results: ReturnType<typeof getWorkspaceFoldersByPrefixes> = {}
240-
241-
for (
242-
let addParentFolderCount = 0;
243-
remainingWorkspaceFoldersToMap.length > 0 &&
244-
addParentFolderCount < workspaceFolderPrefixGuards.maximumFolderDepthConsidered;
245-
addParentFolderCount++
246-
) {
247-
const workspacesByPrefixes = remainingWorkspaceFoldersToMap.reduce((acc, wsFolder) => {
248-
const prefix = wsFolder.preferredPrefixQueue.pop()
249-
// this should never happen, as last candidates should be handled below, and the array starts non empty
250-
if (prefix === undefined) {
251-
throw new ToolkitError(
252-
`Encountered a folder with invalid prefix candidates (workspace folder ${wsFolder.folder.name})`
253-
)
254-
}
255-
acc[prefix] = acc[prefix] ?? []
256-
acc[prefix].push(wsFolder)
257-
return acc
258-
}, {} as { [key: string]: (typeof remainingWorkspaceFoldersToMap)[0][] })
259-
remainingWorkspaceFoldersToMap = []
260-
for (const [prefix, folders] of Object.entries(workspacesByPrefixes)) {
261-
// if a folder has a unique prefix
262-
if (folders.length === 1 && results[prefix] === undefined) {
263-
results[prefix] = folders[0].folder
264-
continue
265-
}
266-
267-
// find the folders that do not have more parents
268-
const foldersToSuffix: typeof folders = []
269-
for (const folder of folders) {
270-
if (folder.preferredPrefixQueue.length > 0) {
271-
remainingWorkspaceFoldersToMap.push(folder)
272-
} else {
273-
foldersToSuffix.push(folder)
274-
}
275-
}
276-
// for these last resort folders, suffix them with an increasing number until unique
277-
if (foldersToSuffix.length === 1 && results[prefix] === undefined) {
278-
results[prefix] = foldersToSuffix[0].folder
279-
} else {
280-
let suffix = 1
281-
for (const folder of foldersToSuffix) {
282-
let newPrefix: string
283-
let safetyCounter = 0
284-
do {
285-
newPrefix = `${prefix}_${suffix}`
286-
suffix++
287-
safetyCounter++
288-
} while (
289-
results[newPrefix] !== undefined &&
290-
safetyCounter < workspaceFolderPrefixGuards.maximumFoldersWithMatchingSubfolders
291-
)
292-
if (safetyCounter >= workspaceFolderPrefixGuards.maximumFoldersWithMatchingSubfolders) {
293-
throw new ToolkitError(
294-
`Could not find a unique prefix for workspace folder ${folder.folder.name} in zip file.`
295-
)
296-
}
297-
results[newPrefix] = folder.folder
298-
}
299-
}
300-
}
301-
}
302-
if (remainingWorkspaceFoldersToMap.length > 0) {
303-
throw new ToolkitError(
304-
`Could not find a unique prefix for workspace folder ${remainingWorkspaceFoldersToMap[0].folder.name} in zip file.`
305-
)
306-
}
307-
308-
return results
309-
}
310-
31165
/**
31266
* gets the absolute path from a zip path
31367
* @param zipFilePath the path in the zip file

packages/core/src/amazonqFeatureDev/util/gitignore.ts

Lines changed: 0 additions & 59 deletions
This file was deleted.

packages/core/src/codewhisperer/models/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ export const fileScanPayloadSizeLimitBytes = 200 * Math.pow(2, 10) // 200 KB
216216

217217
export const fileScanUploadIntent = 'AUTOMATIC_FILE_SECURITY_SCAN'
218218

219-
export const projectScanPayloadSizeLimitBytes = 5 * Math.pow(2, 30) // 5 GB
219+
export const projectScanPayloadSizeLimitBytes = 500 * Math.pow(2, 20) // 500 MB
220220

221221
export const projectScanUploadIntent = 'FULL_PROJECT_SECURITY_SCAN'
222222

packages/core/src/codewhisperer/util/zipUtil.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@ import { getLogger } from '../../shared/logger'
1010
import * as CodeWhispererConstants from '../models/constants'
1111
import { ToolkitError } from '../../shared/errors'
1212
import { fsCommon } from '../../srcShared/fs'
13-
import { collectFiles } from '../../amazonqFeatureDev/util/files'
1413
import { getLoggerForScope } from '../service/securityScanHandler'
1514
import { runtimeLanguageContext } from './runtimeLanguageContext'
1615
import { CodewhispererLanguage } from '../../shared/telemetry/telemetry.gen'
17-
import { CurrentWsFolders } from '../../amazonqFeatureDev/types'
16+
import { CurrentWsFolders, collectFiles } from '../../shared/utilities/workspaceUtils'
1817

1918
export interface ZipMetadata {
2019
rootDir: string
@@ -127,7 +126,12 @@ export class ZipUtil {
127126

128127
const projectPaths = this.getProjectPaths()
129128

130-
const files = await collectFiles(projectPaths, vscode.workspace.workspaceFolders as CurrentWsFolders)
129+
const files = await collectFiles(
130+
projectPaths,
131+
vscode.workspace.workspaceFolders as CurrentWsFolders,
132+
true,
133+
CodeWhispererConstants.projectScanPayloadSizeLimitBytes
134+
)
131135
const languageCount = new Map<CodewhispererLanguage, number>()
132136
for (const file of files) {
133137
const isFileOpenAndDirty = this.isFileOpenAndDirty(file.fileUri)

0 commit comments

Comments
 (0)