From 45412b947be9f51b464418b01650daea77ea3b67 Mon Sep 17 00:00:00 2001 From: hkobew Date: Wed, 19 Mar 2025 17:24:33 -0400 Subject: [PATCH 01/10] refactor: move zipUtil tests to same package as zipUtil --- .../src/test/codewhisperer}/zipUtil.test.ts | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) rename packages/{amazonq/test/unit/codewhisperer/util => core/src/test/codewhisperer}/zipUtil.test.ts (94%) diff --git a/packages/amazonq/test/unit/codewhisperer/util/zipUtil.test.ts b/packages/core/src/test/codewhisperer/zipUtil.test.ts similarity index 94% rename from packages/amazonq/test/unit/codewhisperer/util/zipUtil.test.ts rename to packages/core/src/test/codewhisperer/zipUtil.test.ts index 230e950b045..a82db4a6840 100644 --- a/packages/amazonq/test/unit/codewhisperer/util/zipUtil.test.ts +++ b/packages/core/src/test/codewhisperer/zipUtil.test.ts @@ -7,14 +7,16 @@ import assert from 'assert' import vscode from 'vscode' import sinon from 'sinon' import { join } from 'path' -import { getTestWorkspaceFolder } from 'aws-core-vscode/test' -import { CodeAnalysisScope, CodeWhispererConstants, ZipUtil } from 'aws-core-vscode/codewhisperer' -import { codeScanTruncDirPrefix } from 'aws-core-vscode/codewhisperer' -import { tempDirPath, ToolkitError } from 'aws-core-vscode/shared' -import { LspClient } from 'aws-core-vscode/amazonq' -import { fs } from 'aws-core-vscode/shared' import path from 'path' import JSZip from 'jszip' +import { getTestWorkspaceFolder } from '../../testInteg/integrationTestsUtilities' +import { ZipUtil } from '../../codewhisperer/util/zipUtil' +import { CodeAnalysisScope, codeScanTruncDirPrefix } from '../../codewhisperer/models/constants' +import { ToolkitError } from '../../shared/errors' +import { fs } from '../../shared/fs/fs' +import { tempDirPath } from '../../shared/filesystemUtilities' +import { CodeWhispererConstants } from '../../codewhisperer/indexNode' +import { LspClient } from '../../amazonq/lsp/lspClient' describe('zipUtil', function () { const workspaceFolder = getTestWorkspaceFolder() From 72846c010b4b30e67dc19bff58b00b23bd0f7bbf Mon Sep 17 00:00:00 2001 From: hkobew Date: Wed, 19 Mar 2025 17:40:46 -0400 Subject: [PATCH 02/10] deps: avoid using admZip in zipUtil --- .../core/src/codewhisperer/util/zipUtil.ts | 38 +++++++++---------- .../core/src/shared/utilities/zipStream.ts | 19 ++++++++++ 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/packages/core/src/codewhisperer/util/zipUtil.ts b/packages/core/src/codewhisperer/util/zipUtil.ts index 00ee0ae053d..d2a5230dd96 100644 --- a/packages/core/src/codewhisperer/util/zipUtil.ts +++ b/packages/core/src/codewhisperer/util/zipUtil.ts @@ -2,7 +2,6 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 */ -import admZip from 'adm-zip' import * as vscode from 'vscode' import path from 'path' import { tempDirPath, testGenerationLogsDir } from '../../shared/filesystemUtilities' @@ -25,6 +24,7 @@ import { ChildProcess, ChildProcessOptions } from '../../shared/utilities/proces import { ProjectZipError } from '../../amazonqTest/error' import { removeAnsi } from '../../shared/utilities/textUtilities' import { normalize } from '../../shared/utilities/pathUtils' +import { ZipStream } from '../../shared/utilities/zipStream' export interface ZipMetadata { rootDir: string @@ -109,7 +109,7 @@ export class ZipUtil { if (!uri) { throw new NoActiveFileError() } - const zip = new admZip() + const zip = new ZipStream() const content = await this.getTextContent(uri) @@ -121,14 +121,14 @@ export class ZipUtil { // Set includeWorkspaceFolder to false because we are already manually prepending the projectName const relativePath = vscode.workspace.asRelativePath(uri, false) const zipEntryPath = this.getZipEntryPath(projectName, relativePath) - zip.addFile(zipEntryPath, Buffer.from(content, 'utf-8')) + zip.writeString(content, zipEntryPath) if (scope === CodeWhispererConstants.CodeAnalysisScope.FILE_ON_DEMAND) { const gitDiffContent = `+++ b/${normalize(zipEntryPath)}` // Sending file path in payload for LLM code review - zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8')) + zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath) } } else { - zip.addFile(uri.fsPath, Buffer.from(content, 'utf-8')) + zip.writeString(content, uri.fsPath) } this._pickedSourceFiles.add(uri.fsPath) @@ -139,7 +139,7 @@ export class ZipUtil { throw new FileSizeExceededError() } const zipFilePath = this.getZipDirPath(FeatureUseCase.CODE_SCAN) + CodeWhispererConstants.codeScanZipExt - zip.writeZip(zipFilePath) + await zip.finalizeToFile(zipFilePath) return zipFilePath } @@ -170,13 +170,13 @@ export class ZipUtil { * await processMetadataDir(zip, '/path/to/directory'); * ``` */ - protected async processMetadataDir(zip: admZip, metadataDir: string) { + protected async processMetadataDir(zip: ZipStream, metadataDir: string) { const metadataDirName = path.basename(metadataDir) // Helper function to add empty directory to zip const addEmptyDirectory = (dirPath: string) => { const relativePath = path.relative(metadataDir, dirPath) const pathWithMetadata = path.join(metadataDirName, relativePath, '/') - zip.addFile(pathWithMetadata, Buffer.from('')) + zip.writeString('', pathWithMetadata) } // Recursive function to process directories @@ -193,7 +193,7 @@ export class ZipUtil { const buffer = Buffer.from(fileContent) const relativePath = path.relative(metadataDir, filePath) const pathWithMetadata = path.join(metadataDirName, relativePath) - zip.addFile(pathWithMetadata, buffer) + zip.writeData(buffer, pathWithMetadata) } catch (error) { getLogger().error(`Failed to add file ${filePath} to zip: ${error}`) } @@ -207,7 +207,7 @@ export class ZipUtil { } protected async zipProject(useCase: FeatureUseCase, projectPath?: string, metadataDir?: string) { - const zip = new admZip() + const zip = new ZipStream() let projectPaths = [] if (useCase === FeatureUseCase.TEST_GENERATION && projectPath) { projectPaths.push(projectPath) @@ -232,12 +232,12 @@ export class ZipUtil { } this._language = [...languageCount.entries()].reduce((a, b) => (b[1] > a[1] ? b : a))[0] const zipFilePath = this.getZipDirPath(useCase) + CodeWhispererConstants.codeScanZipExt - zip.writeZip(zipFilePath) + await zip.finalizeToFile(zipFilePath) return zipFilePath } protected async processCombinedGitDiff( - zip: admZip, + zip: ZipStream, projectPaths: string[], filePath?: string, scope?: CodeWhispererConstants.CodeAnalysisScope @@ -254,7 +254,7 @@ export class ZipUtil { }) } if (gitDiffContent) { - zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8')) + zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath) } } @@ -403,7 +403,7 @@ export class ZipUtil { } protected async processSourceFiles( - zip: admZip, + zip: ZipStream, languageCount: Map, projectPaths: string[] | undefined, useCase: FeatureUseCase @@ -444,7 +444,7 @@ export class ZipUtil { } } - protected processOtherFiles(zip: admZip, languageCount: Map) { + protected processOtherFiles(zip: ZipStream, languageCount: Map) { for (const document of vscode.workspace.textDocuments .filter((document) => document.uri.scheme === 'file') .filter((document) => vscode.workspace.getWorkspaceFolder(document.uri) === undefined)) { @@ -474,7 +474,7 @@ export class ZipUtil { } protected processTextFile( - zip: admZip, + zip: ZipStream, uri: vscode.Uri, fileContent: string, languageCount: Map, @@ -493,10 +493,10 @@ export class ZipUtil { this._totalLines += fileContent.split(ZipConstants.newlineRegex).length this.incrementCountForLanguage(uri, languageCount) - zip.addFile(zipEntryPath, Buffer.from(fileContent, 'utf-8')) + zip.writeString(fileContent, zipEntryPath) } - protected async processBinaryFile(zip: admZip, uri: vscode.Uri, zipEntryPath: string) { + protected async processBinaryFile(zip: ZipStream, uri: vscode.Uri, zipEntryPath: string) { const fileSize = (await fs.stat(uri.fsPath)).size if ( @@ -508,7 +508,7 @@ export class ZipUtil { this._pickedSourceFiles.add(uri.fsPath) this._totalSize += fileSize - zip.addLocalFile(uri.fsPath, path.dirname(zipEntryPath)) + zip.writeFile(uri.fsPath, path.dirname(zipEntryPath)) } protected incrementCountForLanguage(uri: vscode.Uri, languageCount: Map) { diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index e03e71cd7ff..027f1046b47 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -8,6 +8,7 @@ import { readFileAsString } from '../filesystemUtilities' // Use require instead of import since this package doesn't support commonjs const { ZipWriter, TextReader, ZipReader, Uint8ArrayReader } = require('@zip.js/zip.js') import { getLogger } from '../logger/logger' +import fs from '../fs/fs' export interface ZipStreamResult { sizeInBytes: number @@ -109,6 +110,16 @@ export class ZipStream { return this._zipWriter.add(path, new TextReader(data)) } + // TODO: add tests for this. + public writeData(data: Uint8Array, path: string) { + return this._zipWriter.add(path, new Uint8ArrayReader(data)) + } + + /** + * Add the content for file to zip at path. + * @param file file to read + * @param path path to write data to in zip. + */ public writeFile(file: string, path: string) { // We use _numberOfFilesToStream to make sure we don't finalize too soon // (before the progress event has been fired for the last file) @@ -155,6 +166,14 @@ export class ZipStream { } } + // TODO: add tests for this. + public async finalizeToFile(targetPath: string) { + const result = await this.finalize() + const contents = result.streamBuffer.getContents() || Buffer.from('') + await fs.writeFile(targetPath, contents) + return result + } + public static async unzip(zipBuffer: Buffer): Promise { const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(zipBuffer))) try { From 4802b56a03f8310cbbeca297c363106bf203ea7a Mon Sep 17 00:00:00 2001 From: hkobew Date: Wed, 19 Mar 2025 18:01:08 -0400 Subject: [PATCH 03/10] test: add tests for new zipStream fs --- .../core/src/shared/utilities/zipStream.ts | 14 ++++---- .../test/shared/utilities/zipStream.test.ts | 36 +++++++++++++++++-- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index 027f1046b47..1fa8bd825a0 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -110,17 +110,16 @@ export class ZipStream { return this._zipWriter.add(path, new TextReader(data)) } - // TODO: add tests for this. public writeData(data: Uint8Array, path: string) { return this._zipWriter.add(path, new Uint8ArrayReader(data)) } /** * Add the content for file to zip at path. - * @param file file to read - * @param path path to write data to in zip. + * @param sourceFilePath file to read + * @param targetFilePath path to write data to in zip. */ - public writeFile(file: string, path: string) { + public writeFile(sourceFilePath: string, targetFilePath: string) { // We use _numberOfFilesToStream to make sure we don't finalize too soon // (before the progress event has been fired for the last file) // The problem is that we can't rely on progress.entries.total, @@ -131,15 +130,15 @@ export class ZipStream { // We only start zipping another file if we're under our limit // of concurrent file streams if (this._filesBeingZipped < this._maxNumberOfFileStreams) { - void readFileAsString(file).then((content) => { - return this._zipWriter.add(path, new TextReader(content), { + void readFileAsString(sourceFilePath).then((content) => { + return this._zipWriter.add(targetFilePath, new TextReader(content), { onend: this.boundFileCompletionCallback, onstart: this.boundFileStartCallback, }) }) } else { // Queue it for later (see "write" event) - this._filesToZip.push([file, path]) + this._filesToZip.push([sourceFilePath, targetFilePath]) } } @@ -166,7 +165,6 @@ export class ZipStream { } } - // TODO: add tests for this. public async finalizeToFile(targetPath: string) { const result = await this.finalize() const contents = result.streamBuffer.getContents() || Buffer.from('') diff --git a/packages/core/src/test/shared/utilities/zipStream.test.ts b/packages/core/src/test/shared/utilities/zipStream.test.ts index ada48c79ccf..fd3226b3181 100644 --- a/packages/core/src/test/shared/utilities/zipStream.test.ts +++ b/packages/core/src/test/shared/utilities/zipStream.test.ts @@ -21,7 +21,7 @@ describe('zipStream', function () { await fs.delete(tmpDir, { recursive: true }) }) - it('Should create a zip stream from text content', async function () { + it('should create a zip stream from text content', async function () { const zipStream = new ZipStream({ hashAlgorithm: 'md5' }) await zipStream.writeString('foo bar', 'file.txt') const result = await zipStream.finalize() @@ -39,7 +39,29 @@ describe('zipStream', function () { assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) }) - it('Should create a zip stream from file', async function () { + it('should create a zip stream from binary content', async function () { + const zipStream = new ZipStream({ hashAlgorithm: 'md5' }) + await zipStream.writeData(Buffer.from('foo bar'), 'file.txt') + const result = await zipStream.finalize() + + const zipBuffer = result.streamBuffer.getContents() + assert.ok(zipBuffer) + + const zipPath = path.join(tmpDir, 'test.zip') + await fs.writeFile(zipPath, zipBuffer) + const expectedMd5 = crypto + .createHash('md5') + .update(await fs.readFileBytes(zipPath)) + .digest('base64') + assert.strictEqual(result.hash, expectedMd5) + assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) + + const zipContents = await ZipStream.unzip(zipBuffer) + assert.strictEqual(zipContents.length, 1) + assert.strictEqual(zipContents[0].filename, 'file.txt') + }) + + it('should create a zip stream from file', async function () { const testFilePath = path.join(tmpDir, 'test.txt') await fs.writeFile(testFilePath, 'foo bar') @@ -71,4 +93,14 @@ describe('zipStream', function () { const zipEntries = await ZipStream.unzip(zipBuffer) assert.strictEqual(zipEntries[0].filename, 'file.txt') }) + + it('should write contents to file', async function () { + const zipStream = new ZipStream() + await zipStream.writeString('foo bar', 'file.txt') + await zipStream.writeData(Buffer.from('foo bar'), 'file2.txt') + const zipPath = path.join(tmpDir, 'test.zip') + const result = await zipStream.finalizeToFile(path.join(tmpDir, 'test.zip')) + + assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) + }) }) From ffcb8f9faf235b0c5307e8a02e4c8266d051b591 Mon Sep 17 00:00:00 2001 From: hkobew Date: Wed, 19 Mar 2025 18:10:58 -0400 Subject: [PATCH 04/10] test: remove duplicate test checks --- .../test/shared/utilities/zipStream.test.ts | 54 +++++++------------ 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/packages/core/src/test/shared/utilities/zipStream.test.ts b/packages/core/src/test/shared/utilities/zipStream.test.ts index fd3226b3181..28bb436d6ed 100644 --- a/packages/core/src/test/shared/utilities/zipStream.test.ts +++ b/packages/core/src/test/shared/utilities/zipStream.test.ts @@ -4,7 +4,7 @@ */ import assert from 'assert' -import { ZipStream } from '../../../shared/utilities/zipStream' +import { ZipStream, ZipStreamResult } from '../../../shared/utilities/zipStream' import { makeTemporaryToolkitFolder } from '../../../shared/filesystemUtilities' import path from 'path' import fs from '../../../shared/fs/fs' @@ -26,17 +26,7 @@ describe('zipStream', function () { await zipStream.writeString('foo bar', 'file.txt') const result = await zipStream.finalize() - const zipBuffer = result.streamBuffer.getContents() - assert.ok(zipBuffer) - - const zipPath = path.join(tmpDir, 'test.zip') - await fs.writeFile(zipPath, zipBuffer) - const expectedMd5 = crypto - .createHash('md5') - .update(await fs.readFileBytes(zipPath)) - .digest('base64') - assert.strictEqual(result.hash, expectedMd5) - assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) + await verifyResult(result, path.join(tmpDir, 'test.zip')) }) it('should create a zip stream from binary content', async function () { @@ -44,19 +34,9 @@ describe('zipStream', function () { await zipStream.writeData(Buffer.from('foo bar'), 'file.txt') const result = await zipStream.finalize() - const zipBuffer = result.streamBuffer.getContents() - assert.ok(zipBuffer) + await verifyResult(result, path.join(tmpDir, 'test.zip')) - const zipPath = path.join(tmpDir, 'test.zip') - await fs.writeFile(zipPath, zipBuffer) - const expectedMd5 = crypto - .createHash('md5') - .update(await fs.readFileBytes(zipPath)) - .digest('base64') - assert.strictEqual(result.hash, expectedMd5) - assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) - - const zipContents = await ZipStream.unzip(zipBuffer) + const zipContents = await ZipStream.unzip(result.streamBuffer.getContents() || Buffer.from('')) assert.strictEqual(zipContents.length, 1) assert.strictEqual(zipContents[0].filename, 'file.txt') }) @@ -69,18 +49,7 @@ describe('zipStream', function () { zipStream.writeFile(testFilePath, 'file.txt') const result = await zipStream.finalize() - const zipPath = path.join(tmpDir, 'test.zip') - - const zipBuffer = result.streamBuffer.getContents() - assert.ok(zipBuffer) - - await fs.writeFile(zipPath, zipBuffer) - const expectedMd5 = crypto - .createHash('md5') - .update(await fs.readFileBytes(zipPath)) - .digest('base64') - assert.strictEqual(result.hash, expectedMd5) - assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) + await verifyResult(result, path.join(tmpDir, 'test.zip')) }) it('should unzip from a buffer', async function () { @@ -104,3 +73,16 @@ describe('zipStream', function () { assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) }) }) + +async function verifyResult(result: ZipStreamResult, zipPath: string) { + const zipBuffer = result.streamBuffer.getContents() + assert.ok(zipBuffer) + + await fs.writeFile(zipPath, zipBuffer) + const expectedMd5 = crypto + .createHash('md5') + .update(await fs.readFileBytes(zipPath)) + .digest('base64') + assert.strictEqual(result.hash, expectedMd5) + assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) +} From ce84f6cfc57a62a9cb9cd1dd046782eb8d84f5a4 Mon Sep 17 00:00:00 2001 From: hkobew Date: Thu, 20 Mar 2025 11:41:14 -0400 Subject: [PATCH 05/10] deps: add types to zipStream --- .../core/src/codewhisperer/util/zipUtil.ts | 5 +-- .../core/src/shared/utilities/zipStream.ts | 45 +++++++++---------- .../test/shared/utilities/zipStream.test.ts | 19 ++------ 3 files changed, 27 insertions(+), 42 deletions(-) diff --git a/packages/core/src/codewhisperer/util/zipUtil.ts b/packages/core/src/codewhisperer/util/zipUtil.ts index d2a5230dd96..32687a6452c 100644 --- a/packages/core/src/codewhisperer/util/zipUtil.ts +++ b/packages/core/src/codewhisperer/util/zipUtil.ts @@ -189,11 +189,10 @@ export class ZipUtil { if (fileType === vscode.FileType.File) { try { - const fileContent = await vscode.workspace.fs.readFile(vscode.Uri.file(filePath)) - const buffer = Buffer.from(fileContent) + const fileUri = vscode.Uri.file(filePath) const relativePath = path.relative(metadataDir, filePath) const pathWithMetadata = path.join(metadataDirName, relativePath) - zip.writeData(buffer, pathWithMetadata) + zip.writeFile(fileUri.fsPath, pathWithMetadata) } catch (error) { getLogger().error(`Failed to add file ${filePath} to zip: ${error}`) } diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index 1fa8bd825a0..67c3e728932 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -5,8 +5,9 @@ import { WritableStreamBuffer } from 'stream-buffers' import crypto from 'crypto' import { readFileAsString } from '../filesystemUtilities' -// Use require instead of import since this package doesn't support commonjs -const { ZipWriter, TextReader, ZipReader, Uint8ArrayReader } = require('@zip.js/zip.js') + +// @ts-ignore +import { ZipWriter, TextReader, ZipReader, Uint8ArrayReader, EntryMetaData } from '@zip.js/zip.js' import { getLogger } from '../logger/logger' import fs from '../fs/fs' @@ -16,10 +17,6 @@ export interface ZipStreamResult { streamBuffer: WritableStreamBuffer } -export type ZipReaderResult = { - filename: string -} - export type ZipStreamProps = { hashAlgorithm: 'md5' | 'sha256' maxNumberOfFileStreams: number @@ -50,8 +47,6 @@ const defaultProps: ZipStreamProps = { * ``` */ export class ZipStream { - // TypeScript compiler is confused about using ZipWriter as a type - // @ts-ignore private _zipWriter: ZipWriter private _streamBuffer: WritableStreamBuffer private _hasher: crypto.Hash @@ -60,8 +55,8 @@ export class ZipStream { private _filesToZip: [string, string][] = [] private _filesBeingZipped: number = 0 private _maxNumberOfFileStreams: number - boundFileCompletionCallback: (progress: number, total: number) => void - boundFileStartCallback: (totalBytes: number) => void + boundFileCompletionCallback: (computedSize: number) => Promise + boundFileStartCallback: (computedSize: number) => Promise constructor(props: Partial = {}) { // Allow any user-provided values to override default values @@ -87,11 +82,11 @@ export class ZipStream { this._hasher = crypto.createHash(hashAlgorithm) } - public onStartCompressingFile(totalBytes: number) { + public async onStartCompressingFile(_totalBytes: number): Promise { this._filesBeingZipped++ } - public onFinishedCompressingFile(computedsize: number) { + public async onFinishedCompressingFile(_computedsize: number) { this._numberOfFilesSucceeded++ this._filesBeingZipped-- @@ -105,13 +100,18 @@ export class ZipStream { }) } } - - public writeString(data: string, path: string) { - return this._zipWriter.add(path, new TextReader(data)) - } - - public writeData(data: Uint8Array, path: string) { - return this._zipWriter.add(path, new Uint8ArrayReader(data)) + /** + * Writes data to the specified path. + * @param data + * @param path + * @param returnPromise optional parameter specifying if caller wants a promise. + * @returns promise to that resolves when data is written. + */ + public writeString(data: string, path: string, returnPromise: true): Promise + public writeString(data: string, path: string, returnPromise?: false): void + public writeString(data: string, path: string, returnPromise?: boolean): void | Promise { + const promise = this._zipWriter.add(path, new TextReader(data)) + return returnPromise ? promise : undefined } /** @@ -126,7 +126,6 @@ export class ZipStream { // because files can be added to the queue faster // than the progress event is fired this._numberOfFilesToStream++ - // console.log('this._numberOfFilesToStream is now', this._numberOfFilesToStream) // We only start zipping another file if we're under our limit // of concurrent file streams if (this._filesBeingZipped < this._maxNumberOfFileStreams) { @@ -165,14 +164,14 @@ export class ZipStream { } } - public async finalizeToFile(targetPath: string) { - const result = await this.finalize() + public async finalizeToFile(targetPath: string, onProgress?: (percentComplete: number) => void) { + const result = await this.finalize(onProgress) const contents = result.streamBuffer.getContents() || Buffer.from('') await fs.writeFile(targetPath, contents) return result } - public static async unzip(zipBuffer: Buffer): Promise { + public static async unzip(zipBuffer: Buffer) { const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(zipBuffer))) try { return await reader.getEntries() diff --git a/packages/core/src/test/shared/utilities/zipStream.test.ts b/packages/core/src/test/shared/utilities/zipStream.test.ts index 28bb436d6ed..351e3e36f90 100644 --- a/packages/core/src/test/shared/utilities/zipStream.test.ts +++ b/packages/core/src/test/shared/utilities/zipStream.test.ts @@ -23,24 +23,12 @@ describe('zipStream', function () { it('should create a zip stream from text content', async function () { const zipStream = new ZipStream({ hashAlgorithm: 'md5' }) - await zipStream.writeString('foo bar', 'file.txt') + await zipStream.writeString('foo bar', 'file.txt', true) const result = await zipStream.finalize() await verifyResult(result, path.join(tmpDir, 'test.zip')) }) - it('should create a zip stream from binary content', async function () { - const zipStream = new ZipStream({ hashAlgorithm: 'md5' }) - await zipStream.writeData(Buffer.from('foo bar'), 'file.txt') - const result = await zipStream.finalize() - - await verifyResult(result, path.join(tmpDir, 'test.zip')) - - const zipContents = await ZipStream.unzip(result.streamBuffer.getContents() || Buffer.from('')) - assert.strictEqual(zipContents.length, 1) - assert.strictEqual(zipContents[0].filename, 'file.txt') - }) - it('should create a zip stream from file', async function () { const testFilePath = path.join(tmpDir, 'test.txt') await fs.writeFile(testFilePath, 'foo bar') @@ -54,7 +42,7 @@ describe('zipStream', function () { it('should unzip from a buffer', async function () { const zipStream = new ZipStream() - await zipStream.writeString('foo bar', 'file.txt') + await zipStream.writeString('foo bar', 'file.txt', true) const result = await zipStream.finalize() const zipBuffer = result.streamBuffer.getContents() @@ -65,8 +53,7 @@ describe('zipStream', function () { it('should write contents to file', async function () { const zipStream = new ZipStream() - await zipStream.writeString('foo bar', 'file.txt') - await zipStream.writeData(Buffer.from('foo bar'), 'file2.txt') + await zipStream.writeString('foo bar', 'file.txt', true) const zipPath = path.join(tmpDir, 'test.zip') const result = await zipStream.finalizeToFile(path.join(tmpDir, 'test.zip')) From 6cdd1828ea9e2d299d1878bee40aa0dc45752518 Mon Sep 17 00:00:00 2001 From: hkobew Date: Thu, 20 Mar 2025 11:49:49 -0400 Subject: [PATCH 06/10] test: improve testing check --- .../core/src/test/shared/utilities/zipStream.test.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/core/src/test/shared/utilities/zipStream.test.ts b/packages/core/src/test/shared/utilities/zipStream.test.ts index 351e3e36f90..d035ea8517f 100644 --- a/packages/core/src/test/shared/utilities/zipStream.test.ts +++ b/packages/core/src/test/shared/utilities/zipStream.test.ts @@ -9,6 +9,8 @@ import { makeTemporaryToolkitFolder } from '../../../shared/filesystemUtilities' import path from 'path' import fs from '../../../shared/fs/fs' import crypto from 'crypto' +// @ts-ignore +import { BlobWriter } from '@zip.js/zip.js' describe('zipStream', function () { let tmpDir: string @@ -42,13 +44,19 @@ describe('zipStream', function () { it('should unzip from a buffer', async function () { const zipStream = new ZipStream() - await zipStream.writeString('foo bar', 'file.txt', true) + await zipStream.writeString('foo bar foo', 'file.txt', true) const result = await zipStream.finalize() const zipBuffer = result.streamBuffer.getContents() assert.ok(zipBuffer) const zipEntries = await ZipStream.unzip(zipBuffer) assert.strictEqual(zipEntries[0].filename, 'file.txt') + assert.strictEqual(zipEntries.length, 1) + + // Read data back from zip to verify + const data = await zipEntries[0].getData!(new BlobWriter('text/plain')) + const textData = await data.text() + assert.strictEqual(textData, 'foo bar foo') }) it('should write contents to file', async function () { From c3d1c8a448c1a3cba4d0c445a1d2df2a487bae5b Mon Sep 17 00:00:00 2001 From: hkobew Date: Thu, 20 Mar 2025 11:56:56 -0400 Subject: [PATCH 07/10] deps: add explicit typing information --- packages/core/src/shared/utilities/zipStream.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index 67c3e728932..a59cf4890ea 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -7,7 +7,7 @@ import crypto from 'crypto' import { readFileAsString } from '../filesystemUtilities' // @ts-ignore -import { ZipWriter, TextReader, ZipReader, Uint8ArrayReader, EntryMetaData } from '@zip.js/zip.js' +import { ZipWriter, TextReader, ZipReader, Uint8ArrayReader, EntryMetaData, Entry } from '@zip.js/zip.js' import { getLogger } from '../logger/logger' import fs from '../fs/fs' @@ -171,7 +171,7 @@ export class ZipStream { return result } - public static async unzip(zipBuffer: Buffer) { + public static async unzip(zipBuffer: Buffer): Promise { const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(zipBuffer))) try { return await reader.getEntries() From 5a651b46f29b774fd8fa7432de1bb14d0fda1439 Mon Sep 17 00:00:00 2001 From: hkobew Date: Thu, 20 Mar 2025 13:25:21 -0400 Subject: [PATCH 08/10] logs: add logs to zip stream process --- packages/core/src/shared/utilities/zipStream.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index a59cf4890ea..822796a8710 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -59,6 +59,7 @@ export class ZipStream { boundFileStartCallback: (computedSize: number) => Promise constructor(props: Partial = {}) { + getLogger().debug('Initializing ZipStream with props: %O', props) // Allow any user-provided values to override default values const mergedProps = { ...defaultProps, ...props } const { hashAlgorithm, compressionLevel, maxNumberOfFileStreams } = mergedProps @@ -157,8 +158,10 @@ export class ZipStream { // We're done streaming all files, so we can close the zip stream await this._zipWriter.close() + const sizeInBytes = this._streamBuffer.size() + getLogger().debug('Finished finalizing zipStream with %d bytes of data', sizeInBytes) return { - sizeInBytes: this._streamBuffer.size(), + sizeInBytes, hash: this._hasher.digest('base64'), streamBuffer: this._streamBuffer, } From 890238ac4624effea9a17c78270499c4a5398c2a Mon Sep 17 00:00:00 2001 From: hkobew Date: Thu, 20 Mar 2025 14:15:19 -0400 Subject: [PATCH 09/10] docs: add comment about workaround --- packages/core/src/shared/utilities/zipStream.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index 822796a8710..dda71a6fad4 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -6,6 +6,8 @@ import { WritableStreamBuffer } from 'stream-buffers' import crypto from 'crypto' import { readFileAsString } from '../filesystemUtilities' +// Does not offer CommonJS support officially: https://github.com/gildas-lormeau/zip.js/issues/362. +// Webpack appears to handle this for us expirementally. // @ts-ignore import { ZipWriter, TextReader, ZipReader, Uint8ArrayReader, EntryMetaData, Entry } from '@zip.js/zip.js' import { getLogger } from '../logger/logger' From f88c80b62ccacdea58a7b7d4810c8310a5199fbe Mon Sep 17 00:00:00 2001 From: hkobew Date: Fri, 21 Mar 2025 10:19:29 -0400 Subject: [PATCH 10/10] fix: move duplicate check to zipStream --- packages/core/src/amazonq/util/files.ts | 7 ------- .../core/src/shared/utilities/zipStream.ts | 7 +++++++ .../test/shared/utilities/zipStream.test.ts | 20 +++++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/packages/core/src/amazonq/util/files.ts b/packages/core/src/amazonq/util/files.ts index f61d4632714..afa0b674928 100644 --- a/packages/core/src/amazonq/util/files.ts +++ b/packages/core/src/amazonq/util/files.ts @@ -95,14 +95,7 @@ export async function prepareRepoData( let totalBytes = 0 const ignoredExtensionMap = new Map() - const addedFilePaths = new Set() - for (const file of files) { - if (addedFilePaths.has(file.zipFilePath)) { - continue - } - addedFilePaths.add(file.zipFilePath) - let fileSize try { fileSize = (await fs.stat(file.fileUri)).size diff --git a/packages/core/src/shared/utilities/zipStream.ts b/packages/core/src/shared/utilities/zipStream.ts index dda71a6fad4..8f32dd2e633 100644 --- a/packages/core/src/shared/utilities/zipStream.ts +++ b/packages/core/src/shared/utilities/zipStream.ts @@ -57,6 +57,8 @@ export class ZipStream { private _filesToZip: [string, string][] = [] private _filesBeingZipped: number = 0 private _maxNumberOfFileStreams: number + private _targetFilesAdded: Set = new Set() + boundFileCompletionCallback: (computedSize: number) => Promise boundFileStartCallback: (computedSize: number) => Promise @@ -123,6 +125,11 @@ export class ZipStream { * @param targetFilePath path to write data to in zip. */ public writeFile(sourceFilePath: string, targetFilePath: string) { + if (this._targetFilesAdded.has(targetFilePath)) { + getLogger().debug(`Skipping duplicate file added to zip: ${targetFilePath}`) + return + } + this._targetFilesAdded.add(targetFilePath) // We use _numberOfFilesToStream to make sure we don't finalize too soon // (before the progress event has been fired for the last file) // The problem is that we can't rely on progress.entries.total, diff --git a/packages/core/src/test/shared/utilities/zipStream.test.ts b/packages/core/src/test/shared/utilities/zipStream.test.ts index d035ea8517f..9186872bc96 100644 --- a/packages/core/src/test/shared/utilities/zipStream.test.ts +++ b/packages/core/src/test/shared/utilities/zipStream.test.ts @@ -11,6 +11,7 @@ import fs from '../../../shared/fs/fs' import crypto from 'crypto' // @ts-ignore import { BlobWriter } from '@zip.js/zip.js' +import { assertLogsContain } from '../../globalSetup.test' describe('zipStream', function () { let tmpDir: string @@ -67,6 +68,25 @@ describe('zipStream', function () { assert.strictEqual(result.sizeInBytes, (await fs.stat(zipPath)).size) }) + + it('only writes to target paths once', async function () { + const testFilePath = path.join(tmpDir, 'test.txt') + await fs.writeFile(testFilePath, 'foo bar') + const testFilePath2 = path.join(tmpDir, 'test2.txt') + await fs.writeFile(testFilePath2, 'foo bar') + + const zipStream = new ZipStream() + zipStream.writeFile(testFilePath, 'file.txt') + zipStream.writeFile(testFilePath2, 'file.txt') + const result = await zipStream.finalize() + + const zipBuffer = result.streamBuffer.getContents() + assert.ok(zipBuffer) + const zipEntries = await ZipStream.unzip(zipBuffer) + + assert.strictEqual(zipEntries.length, 1) + assertLogsContain('duplicate file', false, 'debug') + }) }) async function verifyResult(result: ZipStreamResult, zipPath: string) {