Skip to content
41 changes: 20 additions & 21 deletions packages/core/src/codewhisperer/util/zipUtil.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
import admZip from 'adm-zip'
import * as vscode from 'vscode'
import path from 'path'
import { tempDirPath, testGenerationLogsDir } from '../../shared/filesystemUtilities'
Expand All @@ -25,6 +24,7 @@ import { ChildProcess, ChildProcessOptions } from '../../shared/utilities/proces
import { ProjectZipError } from '../../amazonqTest/error'
import { removeAnsi } from '../../shared/utilities/textUtilities'
import { normalize } from '../../shared/utilities/pathUtils'
import { ZipStream } from '../../shared/utilities/zipStream'

export interface ZipMetadata {
rootDir: string
Expand Down Expand Up @@ -109,7 +109,7 @@ export class ZipUtil {
if (!uri) {
throw new NoActiveFileError()
}
const zip = new admZip()
const zip = new ZipStream()

const content = await this.getTextContent(uri)

Expand All @@ -121,14 +121,14 @@ export class ZipUtil {
// Set includeWorkspaceFolder to false because we are already manually prepending the projectName
const relativePath = vscode.workspace.asRelativePath(uri, false)
const zipEntryPath = this.getZipEntryPath(projectName, relativePath)
zip.addFile(zipEntryPath, Buffer.from(content, 'utf-8'))
zip.writeString(content, zipEntryPath)

if (scope === CodeWhispererConstants.CodeAnalysisScope.FILE_ON_DEMAND) {
const gitDiffContent = `+++ b/${normalize(zipEntryPath)}` // Sending file path in payload for LLM code review
zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8'))
zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath)
}
} else {
zip.addFile(uri.fsPath, Buffer.from(content, 'utf-8'))
zip.writeString(content, uri.fsPath)
}

this._pickedSourceFiles.add(uri.fsPath)
Expand All @@ -139,7 +139,7 @@ export class ZipUtil {
throw new FileSizeExceededError()
}
const zipFilePath = this.getZipDirPath(FeatureUseCase.CODE_SCAN) + CodeWhispererConstants.codeScanZipExt
zip.writeZip(zipFilePath)
await zip.finalizeToFile(zipFilePath)
return zipFilePath
}

Expand Down Expand Up @@ -170,13 +170,13 @@ export class ZipUtil {
* await processMetadataDir(zip, '/path/to/directory');
* ```
*/
protected async processMetadataDir(zip: admZip, metadataDir: string) {
protected async processMetadataDir(zip: ZipStream, metadataDir: string) {
const metadataDirName = path.basename(metadataDir)
// Helper function to add empty directory to zip
const addEmptyDirectory = (dirPath: string) => {
const relativePath = path.relative(metadataDir, dirPath)
const pathWithMetadata = path.join(metadataDirName, relativePath, '/')
zip.addFile(pathWithMetadata, Buffer.from(''))
zip.writeString('', pathWithMetadata)
}

// Recursive function to process directories
Expand All @@ -189,11 +189,10 @@ export class ZipUtil {

if (fileType === vscode.FileType.File) {
try {
const fileContent = await vscode.workspace.fs.readFile(vscode.Uri.file(filePath))
const buffer = Buffer.from(fileContent)
const fileUri = vscode.Uri.file(filePath)
const relativePath = path.relative(metadataDir, filePath)
const pathWithMetadata = path.join(metadataDirName, relativePath)
zip.addFile(pathWithMetadata, buffer)
zip.writeFile(fileUri.fsPath, pathWithMetadata)
} catch (error) {
getLogger().error(`Failed to add file ${filePath} to zip: ${error}`)
}
Expand All @@ -207,7 +206,7 @@ export class ZipUtil {
}

protected async zipProject(useCase: FeatureUseCase, projectPath?: string, metadataDir?: string) {
const zip = new admZip()
const zip = new ZipStream()
let projectPaths = []
if (useCase === FeatureUseCase.TEST_GENERATION && projectPath) {
projectPaths.push(projectPath)
Expand All @@ -232,12 +231,12 @@ export class ZipUtil {
}
this._language = [...languageCount.entries()].reduce((a, b) => (b[1] > a[1] ? b : a))[0]
const zipFilePath = this.getZipDirPath(useCase) + CodeWhispererConstants.codeScanZipExt
zip.writeZip(zipFilePath)
await zip.finalizeToFile(zipFilePath)
return zipFilePath
}

protected async processCombinedGitDiff(
zip: admZip,
zip: ZipStream,
projectPaths: string[],
filePath?: string,
scope?: CodeWhispererConstants.CodeAnalysisScope
Expand All @@ -254,7 +253,7 @@ export class ZipUtil {
})
}
if (gitDiffContent) {
zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8'))
zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath)
}
}

Expand Down Expand Up @@ -403,7 +402,7 @@ export class ZipUtil {
}

protected async processSourceFiles(
zip: admZip,
zip: ZipStream,
languageCount: Map<CodewhispererLanguage, number>,
projectPaths: string[] | undefined,
useCase: FeatureUseCase
Expand Down Expand Up @@ -444,7 +443,7 @@ export class ZipUtil {
}
}

protected processOtherFiles(zip: admZip, languageCount: Map<CodewhispererLanguage, number>) {
protected processOtherFiles(zip: ZipStream, languageCount: Map<CodewhispererLanguage, number>) {
for (const document of vscode.workspace.textDocuments
.filter((document) => document.uri.scheme === 'file')
.filter((document) => vscode.workspace.getWorkspaceFolder(document.uri) === undefined)) {
Expand Down Expand Up @@ -474,7 +473,7 @@ export class ZipUtil {
}

protected processTextFile(
zip: admZip,
zip: ZipStream,
uri: vscode.Uri,
fileContent: string,
languageCount: Map<CodewhispererLanguage, number>,
Expand All @@ -493,10 +492,10 @@ export class ZipUtil {
this._totalLines += fileContent.split(ZipConstants.newlineRegex).length

this.incrementCountForLanguage(uri, languageCount)
zip.addFile(zipEntryPath, Buffer.from(fileContent, 'utf-8'))
zip.writeString(fileContent, zipEntryPath)
}

protected async processBinaryFile(zip: admZip, uri: vscode.Uri, zipEntryPath: string) {
protected async processBinaryFile(zip: ZipStream, uri: vscode.Uri, zipEntryPath: string) {
const fileSize = (await fs.stat(uri.fsPath)).size

if (
Expand All @@ -508,7 +507,7 @@ export class ZipUtil {
this._pickedSourceFiles.add(uri.fsPath)
this._totalSize += fileSize

zip.addLocalFile(uri.fsPath, path.dirname(zipEntryPath))
zip.writeFile(uri.fsPath, path.dirname(zipEntryPath))
}

protected incrementCountForLanguage(uri: vscode.Uri, languageCount: Map<CodewhispererLanguage, number>) {
Expand Down
65 changes: 43 additions & 22 deletions packages/core/src/shared/utilities/zipStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
import { WritableStreamBuffer } from 'stream-buffers'
import crypto from 'crypto'
import { readFileAsString } from '../filesystemUtilities'
// Use require instead of import since this package doesn't support commonjs
const { ZipWriter, TextReader, ZipReader, Uint8ArrayReader } = require('@zip.js/zip.js')

// Does not offer CommonJS support officially: https://github.com/gildas-lormeau/zip.js/issues/362.
// Webpack appears to handle this for us expirementally.
// @ts-ignore
import { ZipWriter, TextReader, ZipReader, Uint8ArrayReader, EntryMetaData, Entry } from '@zip.js/zip.js'
import { getLogger } from '../logger/logger'
import fs from '../fs/fs'

export interface ZipStreamResult {
sizeInBytes: number
hash: string
streamBuffer: WritableStreamBuffer
}

export type ZipReaderResult = {
filename: string
}

export type ZipStreamProps = {
hashAlgorithm: 'md5' | 'sha256'
maxNumberOfFileStreams: number
Expand Down Expand Up @@ -49,8 +49,6 @@ const defaultProps: ZipStreamProps = {
* ```
*/
export class ZipStream {
// TypeScript compiler is confused about using ZipWriter as a type
// @ts-ignore
private _zipWriter: ZipWriter<WritableStream>
private _streamBuffer: WritableStreamBuffer
private _hasher: crypto.Hash
Expand All @@ -59,10 +57,11 @@ export class ZipStream {
private _filesToZip: [string, string][] = []
private _filesBeingZipped: number = 0
private _maxNumberOfFileStreams: number
boundFileCompletionCallback: (progress: number, total: number) => void
boundFileStartCallback: (totalBytes: number) => void
boundFileCompletionCallback: (computedSize: number) => Promise<void>
boundFileStartCallback: (computedSize: number) => Promise<void>

constructor(props: Partial<ZipStreamProps> = {}) {
getLogger().debug('Initializing ZipStream with props: %O', props)
// Allow any user-provided values to override default values
const mergedProps = { ...defaultProps, ...props }
const { hashAlgorithm, compressionLevel, maxNumberOfFileStreams } = mergedProps
Expand All @@ -86,11 +85,11 @@ export class ZipStream {
this._hasher = crypto.createHash(hashAlgorithm)
}

public onStartCompressingFile(totalBytes: number) {
public async onStartCompressingFile(_totalBytes: number): Promise<void> {
this._filesBeingZipped++
}

public onFinishedCompressingFile(computedsize: number) {
public async onFinishedCompressingFile(_computedsize: number) {
this._numberOfFilesSucceeded++
this._filesBeingZipped--

Expand All @@ -104,31 +103,44 @@ export class ZipStream {
})
}
}

public writeString(data: string, path: string) {
return this._zipWriter.add(path, new TextReader(data))
/**
* Writes data to the specified path.
* @param data
* @param path
* @param returnPromise optional parameter specifying if caller wants a promise.
* @returns promise to that resolves when data is written.
*/
public writeString(data: string, path: string, returnPromise: true): Promise<EntryMetaData>
public writeString(data: string, path: string, returnPromise?: false): void
public writeString(data: string, path: string, returnPromise?: boolean): void | Promise<EntryMetaData> {
const promise = this._zipWriter.add(path, new TextReader(data))
return returnPromise ? promise : undefined
}

public writeFile(file: string, path: string) {
/**
* Add the content for file to zip at path.
* @param sourceFilePath file to read
* @param targetFilePath path to write data to in zip.
*/
public writeFile(sourceFilePath: string, targetFilePath: string) {
// We use _numberOfFilesToStream to make sure we don't finalize too soon
// (before the progress event has been fired for the last file)
// The problem is that we can't rely on progress.entries.total,
// because files can be added to the queue faster
// than the progress event is fired
this._numberOfFilesToStream++
// console.log('this._numberOfFilesToStream is now', this._numberOfFilesToStream)
// We only start zipping another file if we're under our limit
// of concurrent file streams
if (this._filesBeingZipped < this._maxNumberOfFileStreams) {
void readFileAsString(file).then((content) => {
return this._zipWriter.add(path, new TextReader(content), {
void readFileAsString(sourceFilePath).then((content) => {
return this._zipWriter.add(targetFilePath, new TextReader(content), {
onend: this.boundFileCompletionCallback,
onstart: this.boundFileStartCallback,
})
})
} else {
// Queue it for later (see "write" event)
this._filesToZip.push([file, path])
this._filesToZip.push([sourceFilePath, targetFilePath])
}
}

Expand All @@ -148,14 +160,23 @@ export class ZipStream {
// We're done streaming all files, so we can close the zip stream

await this._zipWriter.close()
const sizeInBytes = this._streamBuffer.size()
getLogger().debug('Finished finalizing zipStream with %d bytes of data', sizeInBytes)
return {
sizeInBytes: this._streamBuffer.size(),
sizeInBytes,
hash: this._hasher.digest('base64'),
streamBuffer: this._streamBuffer,
}
}

public static async unzip(zipBuffer: Buffer): Promise<ZipReaderResult[]> {
public async finalizeToFile(targetPath: string, onProgress?: (percentComplete: number) => void) {
const result = await this.finalize(onProgress)
const contents = result.streamBuffer.getContents() || Buffer.from('')
await fs.writeFile(targetPath, contents)
return result
}

public static async unzip(zipBuffer: Buffer): Promise<Entry[]> {
const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(zipBuffer)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

off topic: this doesn't actually "stream" the result, it all goes into memory, right? that's something we need to address eventually.

Copy link
Contributor Author

@Hweinstock Hweinstock Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this as well. Also, the current logic in zipUtils dumps this to the file system before uploading whereas prepareRepoData uploads the buffer itself.

If we can centralize the uploading logic (also has two completely separate implementations), this streaming change will be much easier.

try {
return await reader.getEntries()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ import assert from 'assert'
import vscode from 'vscode'
import sinon from 'sinon'
import { join } from 'path'
import { getTestWorkspaceFolder } from 'aws-core-vscode/test'
import { CodeAnalysisScope, CodeWhispererConstants, ZipUtil } from 'aws-core-vscode/codewhisperer'
import { codeScanTruncDirPrefix } from 'aws-core-vscode/codewhisperer'
import { tempDirPath, ToolkitError } from 'aws-core-vscode/shared'
import { LspClient } from 'aws-core-vscode/amazonq'
import { fs } from 'aws-core-vscode/shared'
import path from 'path'
import JSZip from 'jszip'
import { getTestWorkspaceFolder } from '../../testInteg/integrationTestsUtilities'
import { ZipUtil } from '../../codewhisperer/util/zipUtil'
import { CodeAnalysisScope, codeScanTruncDirPrefix } from '../../codewhisperer/models/constants'
import { ToolkitError } from '../../shared/errors'
import { fs } from '../../shared/fs/fs'
import { tempDirPath } from '../../shared/filesystemUtilities'
import { CodeWhispererConstants } from '../../codewhisperer/indexNode'
import { LspClient } from '../../amazonq/lsp/lspClient'

describe('zipUtil', function () {
const workspaceFolder = getTestWorkspaceFolder()
Expand Down
Loading
Loading