Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
bb6e979
refactor: rename parameter according to PR comment
Hweinstock Mar 19, 2025
b93286f
test: move testing file into same package
Hweinstock Mar 19, 2025
79d5375
refactor: break up and simplify components
Hweinstock Mar 19, 2025
7d73cd4
merge: resolve test file conflict
Hweinstock Mar 19, 2025
858468c
feat: introduce general project zipping interface
Hweinstock Mar 19, 2025
eb29e81
refactor: move zipUtil test files to core
Hweinstock Mar 19, 2025
60638ca
merge: add changes from zipUtil
Hweinstock Mar 20, 2025
1c4dffd
test: re-add accidentally deleted test file
Hweinstock Mar 20, 2025
d838201
refactor: inline processing functions
Hweinstock Mar 20, 2025
d60af22
fix: use updated content
Hweinstock Mar 20, 2025
605fc87
feat: check if file is binary when collecting
Hweinstock Mar 20, 2025
1f5c02e
feat: support unsaved changes
Hweinstock Mar 20, 2025
4174cb7
refactor: use data exported from collectFiles
Hweinstock Mar 21, 2025
1fc74f2
refactor: split into components feedadble to zipProjecttUtil
Hweinstock Mar 21, 2025
bf6a416
refactor: use general utility in zipUtil
Hweinstock Mar 21, 2025
acb7361
fix: avoid finalizing zip early
Hweinstock Mar 21, 2025
9f79ef3
fix: add option to include project name
Hweinstock Mar 21, 2025
1f1cf00
fix: allow option to disable posix path
Hweinstock Mar 21, 2025
0841c89
Merge branch 'master' into cleanUp/prepareRepoData
Hweinstock Mar 21, 2025
1845ac8
refactor: split into more components
Hweinstock Mar 21, 2025
d3b5133
refactor: add types for customizations
Hweinstock Mar 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 66 additions & 93 deletions packages/core/src/amazonq/util/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import * as vscode from 'vscode'
import * as path from 'path'
import {
collectFiles,
CollectFilesFilter,
defaultExcludePatterns,
getWorkspaceFoldersByPrefixes,
Expand All @@ -16,7 +15,7 @@ import { PrepareRepoFailedError } from '../../amazonqFeatureDev/errors'
import { getLogger } from '../../shared/logger/logger'
import { maxFileSizeBytes } from '../../amazonqFeatureDev/limits'
import { CurrentWsFolders, DeletedFileInfo, NewFileInfo, NewFileZipContents } from '../../amazonqDoc/types'
import { ContentLengthError, hasCode, ToolkitError } from '../../shared/errors'
import { ContentLengthError, ToolkitError } from '../../shared/errors'
import { AmazonqCreateUpload, Span, telemetry as amznTelemetry, telemetry } from '../../shared/telemetry/telemetry'
import { maxRepoSizeBytes } from '../../amazonqFeatureDev/constants'
import { isCodeFile } from '../../shared/filetypes'
Expand All @@ -28,6 +27,7 @@ import { ZipStream } from '../../shared/utilities/zipStream'
import { isPresent } from '../../shared/utilities/collectionUtils'
import { AuthUtil } from '../../codewhisperer/util/authUtil'
import { TelemetryHelper } from '../util/telemetryHelper'
import { ZipExcluder, zipProject } from './zipProjectUtil'

export const SvgFileExtension = '.svg'

Expand All @@ -44,10 +44,47 @@ function isInfraDiagramFile(relativePath: string) {
)
}

function getFilterAndExcludePattern(useAutoBuildFeature: boolean, includeInfraDiagram: boolean) {
// We only respect gitignore file rules if useAutoBuildFeature is on, this is to avoid dropping necessary files for building the code (e.g. png files imported in js code)
if (useAutoBuildFeature) {
return { excludePatterns: [], filterFn: undefined }
}

// ensure svg is not filtered out by files search
const excludePatterns = includeInfraDiagram
? defaultExcludePatterns.filter((p) => !p.endsWith(SvgFileExtension))
: defaultExcludePatterns

// ensure only infra diagram is included from all svg files
const filterFn: CollectFilesFilter | undefined = includeInfraDiagram
? (relativePath: string) =>
relativePath.toLowerCase().endsWith(SvgFileExtension) && !isInfraDiagramFile(relativePath)
: undefined

return {
filterFn,
excludePatterns,
}
}

async function emitIgnoredExtensionTelemetry(ignoredExtensionMap: Map<string, number>) {
for (const [key, value] of ignoredExtensionMap) {
await amznTelemetry.amazonq_bundleExtensionIgnored.run(async (bundleSpan) => {
const event = {
filenameExt: key,
count: value,
}

bundleSpan.record(event)
})
}
}

export type PrepareRepoDataOptions = {
telemetry?: TelemetryHelper
zip?: ZipStream
isIncludeInfraDiagram?: boolean
includeInfraDiagram?: boolean
fileSizeByteLimit?: number // default to max
}

/**
Expand All @@ -61,68 +98,27 @@ export async function prepareRepoData(
) {
try {
const telemetry = options?.telemetry
const isIncludeInfraDiagram = options?.isIncludeInfraDiagram ?? false
const zip = options?.zip ?? new ZipStream()
const includeInfraDiagram = options?.includeInfraDiagram ?? false
const fileSizeByteLimit = options?.fileSizeByteLimit
? Math.min(options.fileSizeByteLimit, maxFileSizeBytes)
: maxFileSizeBytes

const autoBuildSetting = CodeWhispererSettings.instance.getAutoBuildSetting()
const useAutoBuildFeature = autoBuildSetting[repoRootPaths[0]] ?? false
const excludePatterns: string[] = []
let filterFn: CollectFilesFilter | undefined = undefined

// We only respect gitignore file rules if useAutoBuildFeature is on, this is to avoid dropping necessary files for building the code (e.g. png files imported in js code)
if (!useAutoBuildFeature) {
if (isIncludeInfraDiagram) {
// ensure svg is not filtered out by files search
excludePatterns.push(...defaultExcludePatterns.filter((p) => !p.endsWith(SvgFileExtension)))
// ensure only infra diagram is included from all svg files
filterFn = (relativePath: string) => {
if (!relativePath.toLowerCase().endsWith(SvgFileExtension)) {
return false
}
return !isInfraDiagramFile(relativePath)
}
} else {
excludePatterns.push(...defaultExcludePatterns)
}
}
const { excludePatterns, filterFn } = getFilterAndExcludePattern(useAutoBuildFeature, includeInfraDiagram)

const files = await collectFiles(repoRootPaths, workspaceFolders, {
maxTotalSizeBytes: maxRepoSizeBytes,
excludeByGitIgnore: true,
excludePatterns: excludePatterns,
filterFn: filterFn,
})

let totalBytes = 0
const ignoredExtensionMap = new Map<string, number>()
const addedFilePaths = new Set()

for (const file of files) {
if (addedFilePaths.has(file.zipFilePath)) {
continue
}
addedFilePaths.add(file.zipFilePath)

let fileSize
try {
fileSize = (await fs.stat(file.fileUri)).size
} catch (error) {
if (hasCode(error) && error.code === 'ENOENT') {
// No-op: Skip if file does not exist
continue
}
throw error
}
const isExcluded: ZipExcluder = (file) => {
const isCodeFile_ = isCodeFile(file.relativeFilePath)
const isDevFile = file.relativeFilePath === 'devfile.yaml'
const isInfraDiagramFileExt = isInfraDiagramFile(file.relativeFilePath)

let isExcludeFile = fileSize >= maxFileSizeBytes
let isExcludeFile = file.fileSizeBytes >= fileSizeByteLimit
// When useAutoBuildFeature is on, only respect the gitignore rules filtered earlier and apply the size limit
if (!isExcludeFile && !useAutoBuildFeature) {
isExcludeFile = isDevFile || (!isCodeFile_ && (!isIncludeInfraDiagram || !isInfraDiagramFileExt))
isExcludeFile = isDevFile || (!isCodeFile_ && (!includeInfraDiagram || !isInfraDiagramFileExt))
}

// Side-effect of isExcluded
if (isExcludeFile) {
if (!isCodeFile_) {
const re = /(?:\.([^.]+))?$/
Expand All @@ -134,55 +130,32 @@ export async function prepareRepoData(
ignoredExtensionMap.set(extension, (currentCount ?? 0) + 1)
}
}
continue
}

totalBytes += fileSize
// Paths in zip should be POSIX compliant regardless of OS
// Reference: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
const posixPath = file.zipFilePath.split(path.sep).join(path.posix.sep)

try {
zip.writeFile(file.fileUri.fsPath, posixPath)
} catch (error) {
if (error instanceof Error && error.message.includes('File not found')) {
// No-op: Skip if file was deleted or does not exist
// Reference: https://github.com/cthackers/adm-zip/blob/1cd32f7e0ad3c540142a76609bb538a5cda2292f/adm-zip.js#L296-L321
continue
}
throw error
}
return isExcludeFile
}

const iterator = ignoredExtensionMap.entries()
const zipResult = await zipProject(
repoRootPaths,
workspaceFolders,
{
maxTotalSizeBytes: maxRepoSizeBytes,
excludeByGitIgnore: true,
excludePatterns: excludePatterns,
filterFn: filterFn,
},
{ isExcluded },
{ zip: options?.zip ?? new ZipStream() }
)

for (let i = 0; i < ignoredExtensionMap.size; i++) {
const iteratorValue = iterator.next().value
if (iteratorValue) {
const [key, value] = iteratorValue
await amznTelemetry.amazonq_bundleExtensionIgnored.run(async (bundleSpan) => {
const event = {
filenameExt: key,
count: value,
}

bundleSpan.record(event)
})
}
}
await emitIgnoredExtensionTelemetry(ignoredExtensionMap)

if (telemetry) {
telemetry.setRepositorySize(totalBytes)
telemetry.setRepositorySize(zipResult.totalFileBytes)
}

span.record({ amazonqRepositorySize: totalBytes })
const zipResult = await zip.finalize()

const zipFileBuffer = zipResult.streamBuffer.getContents() || Buffer.from('')
return {
zipFileBuffer,
zipFileChecksum: zipResult.hash,
}
span.record({ amazonqRepositorySize: zipResult.totalFileBytes })
return zipResult
} catch (error) {
getLogger().debug(`Failed to prepare repo: ${error}`)
if (error instanceof ToolkitError && error.code === 'ContentLengthError') {
Expand Down
159 changes: 159 additions & 0 deletions packages/core/src/amazonq/util/zipProjectUtil.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*!
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
import vscode from 'vscode'
import path from 'path'
import {
collectFiles,
CollectFilesOptions,
CollectFilesResultItem,
getFileInfo,
} from '../../shared/utilities/workspaceUtils'
import { CurrentWsFolders } from '../commons/types'
import { ZipStream } from '../../shared/utilities/zipStream'

export interface ZippedWorkspaceResult {
zipFileBuffer: Buffer
zipFileChecksum: string
totalFileBytes: number
}

interface ZipFileAddedResult {
result: 'added'
addedBytes: number
}

interface ZipFileSkippedResult {
result: 'skipped'
reason: 'excluded' | 'missing'
}

interface ZipProjectOptions {
includeProjectName?: boolean
nonPosixPath?: boolean
}

export type ZipExcluder = (file: Omit<CollectFilesResultItem, 'workspaceFolder'>) => boolean
export type ZipErrorCheck = (file: Omit<CollectFilesResultItem, 'workspaceFolder'>) => Error | undefined
export type ZipTracker = (file: Omit<CollectFilesResultItem, 'workspaceFolder'>) => Promise<void> | void

interface ZipProjectCustomizations {
isExcluded?: ZipExcluder
checkForError?: ZipErrorCheck
computeSideEffects?: ZipTracker
}

export async function addFileToZip(
file: Omit<CollectFilesResultItem, 'workspaceFolder'>,
targetFilePath: string,
zip: ZipStream,
customizations?: ZipProjectCustomizations,
options?: ZipProjectOptions
): Promise<ZipFileAddedResult | ZipFileSkippedResult> {
if (customizations?.isExcluded && customizations.isExcluded(file)) {
return { result: 'skipped', reason: 'excluded' }
}
const errorToThrow = customizations?.checkForError ? customizations.checkForError(file) : undefined
if (errorToThrow) {
throw errorToThrow
}

try {
// filepath will be out-of-sync for files with unsaved changes.
if (file.isText) {
zip.writeString(file.fileContent, targetFilePath)
} else {
zip.writeFile(file.fileUri.fsPath, path.dirname(targetFilePath))
}
} catch (error) {
if (error instanceof Error && error.message.includes('File not found')) {
// No-op: Skip if file was deleted or does not exist
// Reference: https://github.com/cthackers/adm-zip/blob/1cd32f7e0ad3c540142a76609bb538a5cda2292f/adm-zip.js#L296-L321
return { result: 'skipped', reason: 'missing' }
}
throw error
}

if (customizations?.computeSideEffects) {
await customizations.computeSideEffects(file)
}

return { result: 'added', addedBytes: file.fileSizeBytes }
}

export async function addProjectToZip(
repoRootPaths: string[],
workspaceFolders: CurrentWsFolders,
collectFilesOptions: CollectFilesOptions,
zip: ZipStream,
customizations?: ZipProjectCustomizations,
options?: ZipProjectOptions
) {
const files = await collectFiles(repoRootPaths, workspaceFolders, collectFilesOptions)
const zippedFiles = new Set()
let totalBytes: number = 0
for (const file of files) {
if (zippedFiles.has(file.zipFilePath)) {
continue
}
zippedFiles.add(file.zipFilePath)

// Paths in zip should be POSIX compliant regardless of OS
// Reference: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
const zipFilePath = options?.includeProjectName
? path.join(path.basename(file.workspaceFolder.uri.fsPath), file.zipFilePath)
: file.zipFilePath
const targetPath = options?.nonPosixPath ? zipFilePath : zipFilePath.split(path.sep).join(path.posix.sep)

const addFileResult = await addFileToZip(file, targetPath, zip, customizations, options)
if (addFileResult.result === 'added') {
totalBytes += addFileResult.addedBytes
}
}

return { zip, totalBytesAdded: totalBytes }
}

export async function zipProject(
repoRootPaths: string[],
workspaceFolders: CurrentWsFolders,
collectFilesOptions: CollectFilesOptions,
customizations?: ZipProjectCustomizations,
options?: ZipProjectOptions & { zip?: ZipStream }
): Promise<ZippedWorkspaceResult> {
const { zip, totalBytesAdded } = await addProjectToZip(
repoRootPaths,
workspaceFolders,
collectFilesOptions,
options?.zip ?? new ZipStream(),
customizations,
options
)
const zipResult = await zip.finalize()
const zipFileBuffer = zipResult.streamBuffer.getContents() || Buffer.from('')
return {
zipFileBuffer,
zipFileChecksum: zipResult.hash,
totalFileBytes: totalBytesAdded,
}
}
// TODO: remove vscode dep
export async function zipFile(
file: vscode.Uri,
targetPath: string,
customizations?: ZipProjectCustomizations,
options?: ZipProjectOptions
) {
return await addFileToZip(
{
...(await getFileInfo(file, true)),
zipFilePath: targetPath,
relativeFilePath: file.fsPath,
},
targetPath,
new ZipStream(),
customizations,
options
)
}
2 changes: 1 addition & 1 deletion packages/core/src/amazonqDoc/session/sessionState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ export class DocPrepareCodeGenState extends BasePrepareCodeGenState {
) {
return await prepareRepoData(workspaceRoots, workspaceFolders, span, {
...options,
isIncludeInfraDiagram: true,
includeInfraDiagram: true,
})
}
}
Loading
Loading