Skip to content
38 changes: 19 additions & 19 deletions packages/core/src/codewhisperer/util/zipUtil.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
import admZip from 'adm-zip'
import * as vscode from 'vscode'
import path from 'path'
import { tempDirPath, testGenerationLogsDir } from '../../shared/filesystemUtilities'
Expand All @@ -25,6 +24,7 @@ import { ChildProcess, ChildProcessOptions } from '../../shared/utilities/proces
import { ProjectZipError } from '../../amazonqTest/error'
import { removeAnsi } from '../../shared/utilities/textUtilities'
import { normalize } from '../../shared/utilities/pathUtils'
import { ZipStream } from '../../shared/utilities/zipStream'

export interface ZipMetadata {
rootDir: string
Expand Down Expand Up @@ -109,7 +109,7 @@ export class ZipUtil {
if (!uri) {
throw new NoActiveFileError()
}
const zip = new admZip()
const zip = new ZipStream()

const content = await this.getTextContent(uri)

Expand All @@ -121,14 +121,14 @@ export class ZipUtil {
// Set includeWorkspaceFolder to false because we are already manually prepending the projectName
const relativePath = vscode.workspace.asRelativePath(uri, false)
const zipEntryPath = this.getZipEntryPath(projectName, relativePath)
zip.addFile(zipEntryPath, Buffer.from(content, 'utf-8'))
zip.writeString(content, zipEntryPath)

if (scope === CodeWhispererConstants.CodeAnalysisScope.FILE_ON_DEMAND) {
const gitDiffContent = `+++ b/${normalize(zipEntryPath)}` // Sending file path in payload for LLM code review
zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8'))
zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath)
}
} else {
zip.addFile(uri.fsPath, Buffer.from(content, 'utf-8'))
zip.writeString(content, uri.fsPath)
}

this._pickedSourceFiles.add(uri.fsPath)
Expand All @@ -139,7 +139,7 @@ export class ZipUtil {
throw new FileSizeExceededError()
}
const zipFilePath = this.getZipDirPath(FeatureUseCase.CODE_SCAN) + CodeWhispererConstants.codeScanZipExt
zip.writeZip(zipFilePath)
await zip.finalizeToFile(zipFilePath)
return zipFilePath
}

Expand Down Expand Up @@ -170,13 +170,13 @@ export class ZipUtil {
* await processMetadataDir(zip, '/path/to/directory');
* ```
*/
protected async processMetadataDir(zip: admZip, metadataDir: string) {
protected async processMetadataDir(zip: ZipStream, metadataDir: string) {
const metadataDirName = path.basename(metadataDir)
// Helper function to add empty directory to zip
const addEmptyDirectory = (dirPath: string) => {
const relativePath = path.relative(metadataDir, dirPath)
const pathWithMetadata = path.join(metadataDirName, relativePath, '/')
zip.addFile(pathWithMetadata, Buffer.from(''))
zip.writeString('', pathWithMetadata)
}

// Recursive function to process directories
Expand All @@ -193,7 +193,7 @@ export class ZipUtil {
const buffer = Buffer.from(fileContent)
const relativePath = path.relative(metadataDir, filePath)
const pathWithMetadata = path.join(metadataDirName, relativePath)
zip.addFile(pathWithMetadata, buffer)
zip.writeData(buffer, pathWithMetadata)
} catch (error) {
getLogger().error(`Failed to add file ${filePath} to zip: ${error}`)
}
Expand All @@ -207,7 +207,7 @@ export class ZipUtil {
}

protected async zipProject(useCase: FeatureUseCase, projectPath?: string, metadataDir?: string) {
const zip = new admZip()
const zip = new ZipStream()
let projectPaths = []
if (useCase === FeatureUseCase.TEST_GENERATION && projectPath) {
projectPaths.push(projectPath)
Expand All @@ -232,12 +232,12 @@ export class ZipUtil {
}
this._language = [...languageCount.entries()].reduce((a, b) => (b[1] > a[1] ? b : a))[0]
const zipFilePath = this.getZipDirPath(useCase) + CodeWhispererConstants.codeScanZipExt
zip.writeZip(zipFilePath)
await zip.finalizeToFile(zipFilePath)
return zipFilePath
}

protected async processCombinedGitDiff(
zip: admZip,
zip: ZipStream,
projectPaths: string[],
filePath?: string,
scope?: CodeWhispererConstants.CodeAnalysisScope
Expand All @@ -254,7 +254,7 @@ export class ZipUtil {
})
}
if (gitDiffContent) {
zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8'))
zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath)
}
}

Expand Down Expand Up @@ -403,7 +403,7 @@ export class ZipUtil {
}

protected async processSourceFiles(
zip: admZip,
zip: ZipStream,
languageCount: Map<CodewhispererLanguage, number>,
projectPaths: string[] | undefined,
useCase: FeatureUseCase
Expand Down Expand Up @@ -444,7 +444,7 @@ export class ZipUtil {
}
}

protected processOtherFiles(zip: admZip, languageCount: Map<CodewhispererLanguage, number>) {
protected processOtherFiles(zip: ZipStream, languageCount: Map<CodewhispererLanguage, number>) {
for (const document of vscode.workspace.textDocuments
.filter((document) => document.uri.scheme === 'file')
.filter((document) => vscode.workspace.getWorkspaceFolder(document.uri) === undefined)) {
Expand Down Expand Up @@ -474,7 +474,7 @@ export class ZipUtil {
}

protected processTextFile(
zip: admZip,
zip: ZipStream,
uri: vscode.Uri,
fileContent: string,
languageCount: Map<CodewhispererLanguage, number>,
Expand All @@ -493,10 +493,10 @@ export class ZipUtil {
this._totalLines += fileContent.split(ZipConstants.newlineRegex).length

this.incrementCountForLanguage(uri, languageCount)
zip.addFile(zipEntryPath, Buffer.from(fileContent, 'utf-8'))
zip.writeString(fileContent, zipEntryPath)
}

protected async processBinaryFile(zip: admZip, uri: vscode.Uri, zipEntryPath: string) {
protected async processBinaryFile(zip: ZipStream, uri: vscode.Uri, zipEntryPath: string) {
const fileSize = (await fs.stat(uri.fsPath)).size

if (
Expand All @@ -508,7 +508,7 @@ export class ZipUtil {
this._pickedSourceFiles.add(uri.fsPath)
this._totalSize += fileSize

zip.addLocalFile(uri.fsPath, path.dirname(zipEntryPath))
zip.writeFile(uri.fsPath, path.dirname(zipEntryPath))
}

protected incrementCountForLanguage(uri: vscode.Uri, languageCount: Map<CodewhispererLanguage, number>) {
Expand Down
25 changes: 21 additions & 4 deletions packages/core/src/shared/utilities/zipStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { readFileAsString } from '../filesystemUtilities'
// Use require instead of import since this package doesn't support commonjs
const { ZipWriter, TextReader, ZipReader, Uint8ArrayReader } = require('@zip.js/zip.js')
import { getLogger } from '../logger/logger'
import fs from '../fs/fs'

export interface ZipStreamResult {
sizeInBytes: number
Expand Down Expand Up @@ -109,7 +110,16 @@ export class ZipStream {
return this._zipWriter.add(path, new TextReader(data))
}

public writeFile(file: string, path: string) {
public writeData(data: Uint8Array, path: string) {
return this._zipWriter.add(path, new Uint8ArrayReader(data))
}

/**
* Add the content for file to zip at path.
* @param sourceFilePath file to read
* @param targetFilePath path to write data to in zip.
*/
public writeFile(sourceFilePath: string, targetFilePath: string) {
// We use _numberOfFilesToStream to make sure we don't finalize too soon
// (before the progress event has been fired for the last file)
// The problem is that we can't rely on progress.entries.total,
Expand All @@ -120,15 +130,15 @@ export class ZipStream {
// We only start zipping another file if we're under our limit
// of concurrent file streams
if (this._filesBeingZipped < this._maxNumberOfFileStreams) {
void readFileAsString(file).then((content) => {
return this._zipWriter.add(path, new TextReader(content), {
void readFileAsString(sourceFilePath).then((content) => {
return this._zipWriter.add(targetFilePath, new TextReader(content), {
onend: this.boundFileCompletionCallback,
onstart: this.boundFileStartCallback,
})
})
} else {
// Queue it for later (see "write" event)
this._filesToZip.push([file, path])
this._filesToZip.push([sourceFilePath, targetFilePath])
}
}

Expand All @@ -155,6 +165,13 @@ export class ZipStream {
}
}

public async finalizeToFile(targetPath: string) {
const result = await this.finalize()
const contents = result.streamBuffer.getContents() || Buffer.from('')
await fs.writeFile(targetPath, contents)
return result
}

public static async unzip(zipBuffer: Buffer): Promise<ZipReaderResult[]> {
const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(zipBuffer)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

off topic: this doesn't actually "stream" the result, it all goes into memory, right? that's something we need to address eventually.

Copy link
Contributor Author

@Hweinstock Hweinstock Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this as well. Also, the current logic in zipUtils dumps this to the file system before uploading whereas prepareRepoData uploads the buffer itself.

If we can centralize the uploading logic (also has two completely separate implementations), this streaming change will be much easier.

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ import assert from 'assert'
import vscode from 'vscode'
import sinon from 'sinon'
import { join } from 'path'
import { getTestWorkspaceFolder } from 'aws-core-vscode/test'
import { CodeAnalysisScope, CodeWhispererConstants, ZipUtil } from 'aws-core-vscode/codewhisperer'
import { codeScanTruncDirPrefix } from 'aws-core-vscode/codewhisperer'
import { tempDirPath, ToolkitError } from 'aws-core-vscode/shared'
import { LspClient } from 'aws-core-vscode/amazonq'
import { fs } from 'aws-core-vscode/shared'
import path from 'path'
import JSZip from 'jszip'
import { getTestWorkspaceFolder } from '../../testInteg/integrationTestsUtilities'
import { ZipUtil } from '../../codewhisperer/util/zipUtil'
import { CodeAnalysisScope, codeScanTruncDirPrefix } from '../../codewhisperer/models/constants'
import { ToolkitError } from '../../shared/errors'
import { fs } from '../../shared/fs/fs'
import { tempDirPath } from '../../shared/filesystemUtilities'
import { CodeWhispererConstants } from '../../codewhisperer/indexNode'
import { LspClient } from '../../amazonq/lsp/lspClient'

describe('zipUtil', function () {
const workspaceFolder = getTestWorkspaceFolder()
Expand Down
64 changes: 39 additions & 25 deletions packages/core/src/test/shared/utilities/zipStream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

import assert from 'assert'
import { ZipStream } from '../../../shared/utilities/zipStream'
import { ZipStream, ZipStreamResult } from '../../../shared/utilities/zipStream'
import { makeTemporaryToolkitFolder } from '../../../shared/filesystemUtilities'
import path from 'path'
import fs from '../../../shared/fs/fs'
Expand All @@ -21,44 +21,35 @@ describe('zipStream', function () {
await fs.delete(tmpDir, { recursive: true })
})

it('Should create a zip stream from text content', async function () {
it('should create a zip stream from text content', async function () {
const zipStream = new ZipStream({ hashAlgorithm: 'md5' })
await zipStream.writeString('foo bar', 'file.txt')
const result = await zipStream.finalize()

const zipBuffer = result.streamBuffer.getContents()
assert.ok(zipBuffer)
await verifyResult(result, path.join(tmpDir, 'test.zip'))
})

const zipPath = path.join(tmpDir, 'test.zip')
await fs.writeFile(zipPath, zipBuffer)
const expectedMd5 = crypto
.createHash('md5')
.update(await fs.readFileBytes(zipPath))
.digest('base64')
assert.strictEqual(result.hash, expectedMd5)
assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size)
it('should create a zip stream from binary content', async function () {
const zipStream = new ZipStream({ hashAlgorithm: 'md5' })
await zipStream.writeData(Buffer.from('foo bar'), 'file.txt')
const result = await zipStream.finalize()

await verifyResult(result, path.join(tmpDir, 'test.zip'))

const zipContents = await ZipStream.unzip(result.streamBuffer.getContents() || Buffer.from(''))
assert.strictEqual(zipContents.length, 1)
assert.strictEqual(zipContents[0].filename, 'file.txt')
})

it('Should create a zip stream from file', async function () {
it('should create a zip stream from file', async function () {
const testFilePath = path.join(tmpDir, 'test.txt')
await fs.writeFile(testFilePath, 'foo bar')

const zipStream = new ZipStream({ hashAlgorithm: 'md5' })
zipStream.writeFile(testFilePath, 'file.txt')
const result = await zipStream.finalize()

const zipPath = path.join(tmpDir, 'test.zip')

const zipBuffer = result.streamBuffer.getContents()
assert.ok(zipBuffer)

await fs.writeFile(zipPath, zipBuffer)
const expectedMd5 = crypto
.createHash('md5')
.update(await fs.readFileBytes(zipPath))
.digest('base64')
assert.strictEqual(result.hash, expectedMd5)
assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size)
await verifyResult(result, path.join(tmpDir, 'test.zip'))
})

it('should unzip from a buffer', async function () {
Expand All @@ -71,4 +62,27 @@ describe('zipStream', function () {
const zipEntries = await ZipStream.unzip(zipBuffer)
assert.strictEqual(zipEntries[0].filename, 'file.txt')
})

it('should write contents to file', async function () {
const zipStream = new ZipStream()
await zipStream.writeString('foo bar', 'file.txt')
await zipStream.writeData(Buffer.from('foo bar'), 'file2.txt')
const zipPath = path.join(tmpDir, 'test.zip')
const result = await zipStream.finalizeToFile(path.join(tmpDir, 'test.zip'))

assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size)
})
})

async function verifyResult(result: ZipStreamResult, zipPath: string) {
const zipBuffer = result.streamBuffer.getContents()
assert.ok(zipBuffer)

await fs.writeFile(zipPath, zipBuffer)
const expectedMd5 = crypto
.createHash('md5')
.update(await fs.readFileBytes(zipPath))
.digest('base64')
assert.strictEqual(result.hash, expectedMd5)
assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size)
}
Loading