Skip to content

Commit 4459171

Browse files
authored
deps(ziputil): migrate ziputil to use a web-safe and typed ZipStream (aws#6816)
## Problem This PR solves two very related problems so there are done together in one PR: 1) ZipUtil uses `admZip` which not web compatible. Also, the larger issue is that `ZipUtil` duplicates `prepareRepoData` in some ways, and it is difficult to align their implementations if they use different libraries. 2) The [ZipStream](https://github.com/aws/aws-toolkit-vscode/blob/bd378854550362810170a4d9164c426e622de256/packages/core/src/shared/utilities/zipStream.ts#L51) utility is a wrapper around the `zip.js` library. However, it imports it via `CommonJS` `require` syntax causing us to lose type information. This is dangerous because we are assuming properties and functions exist within the library, and are blind to changes in the underlying library. This makes the `ZipStream` module difficult and unsafe to work with. ## Solution - For Problem 1: - migrate `ZipUtil` away from `admZip`. - move `ZipUtil` tests to live in `core` package since `ZipUtil` lives there. - add some utility functions to `ZipStream` + tests for them. - For Problem 2: - Import `zip.js` as a module and tell the compiler to ignore it. It looks like this happens because `zip.js` doesn't support CommonJS, and in general doesn't support Node officially (source: gildas-lormeau/zip.js#362). However, a future commit appears to add this support (on that same issue) so unsure why we still get the error. Experimentally this is working, so its likely Webpack handles this for us. - This requires updating some type signatures and also reveals some slight bugs in the implementation. also addresses: aws#6566 ## Verification - Used /review on a midsize project and it produced the same results on this and previous release (without changes). --- - Treat all work as PUBLIC. Private `feature/x` branches will not be squash-merged at release time. - Your code changes must meet the guidelines in [CONTRIBUTING.md](https://github.com/aws/aws-toolkit-vscode/blob/master/CONTRIBUTING.md#guidelines). - License: I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent db491de commit 4459171

File tree

5 files changed

+132
-81
lines changed

5 files changed

+132
-81
lines changed

packages/core/src/amazonq/util/files.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,7 @@ export async function prepareRepoData(
9595

9696
let totalBytes = 0
9797
const ignoredExtensionMap = new Map<string, number>()
98-
const addedFilePaths = new Set()
99-
10098
for (const file of files) {
101-
if (addedFilePaths.has(file.zipFilePath)) {
102-
continue
103-
}
104-
addedFilePaths.add(file.zipFilePath)
105-
10699
let fileSize
107100
try {
108101
fileSize = (await fs.stat(file.fileUri)).size

packages/core/src/codewhisperer/util/zipUtil.ts

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
5-
import admZip from 'adm-zip'
65
import * as vscode from 'vscode'
76
import path from 'path'
87
import { tempDirPath, testGenerationLogsDir } from '../../shared/filesystemUtilities'
@@ -25,6 +24,7 @@ import { ChildProcess, ChildProcessOptions } from '../../shared/utilities/proces
2524
import { ProjectZipError } from '../../amazonqTest/error'
2625
import { removeAnsi } from '../../shared/utilities/textUtilities'
2726
import { normalize } from '../../shared/utilities/pathUtils'
27+
import { ZipStream } from '../../shared/utilities/zipStream'
2828

2929
export interface ZipMetadata {
3030
rootDir: string
@@ -109,7 +109,7 @@ export class ZipUtil {
109109
if (!uri) {
110110
throw new NoActiveFileError()
111111
}
112-
const zip = new admZip()
112+
const zip = new ZipStream()
113113

114114
const content = await this.getTextContent(uri)
115115

@@ -121,14 +121,14 @@ export class ZipUtil {
121121
// Set includeWorkspaceFolder to false because we are already manually prepending the projectName
122122
const relativePath = vscode.workspace.asRelativePath(uri, false)
123123
const zipEntryPath = this.getZipEntryPath(projectName, relativePath)
124-
zip.addFile(zipEntryPath, Buffer.from(content, 'utf-8'))
124+
zip.writeString(content, zipEntryPath)
125125

126126
if (scope === CodeWhispererConstants.CodeAnalysisScope.FILE_ON_DEMAND) {
127127
const gitDiffContent = `+++ b/${normalize(zipEntryPath)}` // Sending file path in payload for LLM code review
128-
zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8'))
128+
zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath)
129129
}
130130
} else {
131-
zip.addFile(uri.fsPath, Buffer.from(content, 'utf-8'))
131+
zip.writeString(content, uri.fsPath)
132132
}
133133

134134
this._pickedSourceFiles.add(uri.fsPath)
@@ -139,7 +139,7 @@ export class ZipUtil {
139139
throw new FileSizeExceededError()
140140
}
141141
const zipFilePath = this.getZipDirPath(FeatureUseCase.CODE_SCAN) + CodeWhispererConstants.codeScanZipExt
142-
zip.writeZip(zipFilePath)
142+
await zip.finalizeToFile(zipFilePath)
143143
return zipFilePath
144144
}
145145

@@ -170,13 +170,13 @@ export class ZipUtil {
170170
* await processMetadataDir(zip, '/path/to/directory');
171171
* ```
172172
*/
173-
protected async processMetadataDir(zip: admZip, metadataDir: string) {
173+
protected async processMetadataDir(zip: ZipStream, metadataDir: string) {
174174
const metadataDirName = path.basename(metadataDir)
175175
// Helper function to add empty directory to zip
176176
const addEmptyDirectory = (dirPath: string) => {
177177
const relativePath = path.relative(metadataDir, dirPath)
178178
const pathWithMetadata = path.join(metadataDirName, relativePath, '/')
179-
zip.addFile(pathWithMetadata, Buffer.from(''))
179+
zip.writeString('', pathWithMetadata)
180180
}
181181

182182
// Recursive function to process directories
@@ -189,11 +189,10 @@ export class ZipUtil {
189189

190190
if (fileType === vscode.FileType.File) {
191191
try {
192-
const fileContent = await vscode.workspace.fs.readFile(vscode.Uri.file(filePath))
193-
const buffer = Buffer.from(fileContent)
192+
const fileUri = vscode.Uri.file(filePath)
194193
const relativePath = path.relative(metadataDir, filePath)
195194
const pathWithMetadata = path.join(metadataDirName, relativePath)
196-
zip.addFile(pathWithMetadata, buffer)
195+
zip.writeFile(fileUri.fsPath, pathWithMetadata)
197196
} catch (error) {
198197
getLogger().error(`Failed to add file ${filePath} to zip: ${error}`)
199198
}
@@ -207,7 +206,7 @@ export class ZipUtil {
207206
}
208207

209208
protected async zipProject(useCase: FeatureUseCase, projectPath?: string, metadataDir?: string) {
210-
const zip = new admZip()
209+
const zip = new ZipStream()
211210
let projectPaths = []
212211
if (useCase === FeatureUseCase.TEST_GENERATION && projectPath) {
213212
projectPaths.push(projectPath)
@@ -232,12 +231,12 @@ export class ZipUtil {
232231
}
233232
this._language = [...languageCount.entries()].reduce((a, b) => (b[1] > a[1] ? b : a))[0]
234233
const zipFilePath = this.getZipDirPath(useCase) + CodeWhispererConstants.codeScanZipExt
235-
zip.writeZip(zipFilePath)
234+
await zip.finalizeToFile(zipFilePath)
236235
return zipFilePath
237236
}
238237

239238
protected async processCombinedGitDiff(
240-
zip: admZip,
239+
zip: ZipStream,
241240
projectPaths: string[],
242241
filePath?: string,
243242
scope?: CodeWhispererConstants.CodeAnalysisScope
@@ -254,7 +253,7 @@ export class ZipUtil {
254253
})
255254
}
256255
if (gitDiffContent) {
257-
zip.addFile(ZipConstants.codeDiffFilePath, Buffer.from(gitDiffContent, 'utf-8'))
256+
zip.writeString(gitDiffContent, ZipConstants.codeDiffFilePath)
258257
}
259258
}
260259

@@ -403,7 +402,7 @@ export class ZipUtil {
403402
}
404403

405404
protected async processSourceFiles(
406-
zip: admZip,
405+
zip: ZipStream,
407406
languageCount: Map<CodewhispererLanguage, number>,
408407
projectPaths: string[] | undefined,
409408
useCase: FeatureUseCase
@@ -444,7 +443,7 @@ export class ZipUtil {
444443
}
445444
}
446445

447-
protected processOtherFiles(zip: admZip, languageCount: Map<CodewhispererLanguage, number>) {
446+
protected processOtherFiles(zip: ZipStream, languageCount: Map<CodewhispererLanguage, number>) {
448447
for (const document of vscode.workspace.textDocuments
449448
.filter((document) => document.uri.scheme === 'file')
450449
.filter((document) => vscode.workspace.getWorkspaceFolder(document.uri) === undefined)) {
@@ -474,7 +473,7 @@ export class ZipUtil {
474473
}
475474

476475
protected processTextFile(
477-
zip: admZip,
476+
zip: ZipStream,
478477
uri: vscode.Uri,
479478
fileContent: string,
480479
languageCount: Map<CodewhispererLanguage, number>,
@@ -493,10 +492,10 @@ export class ZipUtil {
493492
this._totalLines += fileContent.split(ZipConstants.newlineRegex).length
494493

495494
this.incrementCountForLanguage(uri, languageCount)
496-
zip.addFile(zipEntryPath, Buffer.from(fileContent, 'utf-8'))
495+
zip.writeString(fileContent, zipEntryPath)
497496
}
498497

499-
protected async processBinaryFile(zip: admZip, uri: vscode.Uri, zipEntryPath: string) {
498+
protected async processBinaryFile(zip: ZipStream, uri: vscode.Uri, zipEntryPath: string) {
500499
const fileSize = (await fs.stat(uri.fsPath)).size
501500

502501
if (
@@ -508,7 +507,7 @@ export class ZipUtil {
508507
this._pickedSourceFiles.add(uri.fsPath)
509508
this._totalSize += fileSize
510509

511-
zip.addLocalFile(uri.fsPath, path.dirname(zipEntryPath))
510+
zip.writeFile(uri.fsPath, path.dirname(zipEntryPath))
512511
}
513512

514513
protected incrementCountForLanguage(uri: vscode.Uri, languageCount: Map<CodewhispererLanguage, number>) {

packages/core/src/shared/utilities/zipStream.ts

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55
import { WritableStreamBuffer } from 'stream-buffers'
66
import crypto from 'crypto'
77
import { readFileAsString } from '../filesystemUtilities'
8-
// Use require instead of import since this package doesn't support commonjs
9-
const { ZipWriter, TextReader, ZipReader, Uint8ArrayReader } = require('@zip.js/zip.js')
8+
9+
// Does not offer CommonJS support officially: https://github.com/gildas-lormeau/zip.js/issues/362.
10+
// Webpack appears to handle this for us expirementally.
11+
// @ts-ignore
12+
import { ZipWriter, TextReader, ZipReader, Uint8ArrayReader, EntryMetaData, Entry } from '@zip.js/zip.js'
1013
import { getLogger } from '../logger/logger'
14+
import fs from '../fs/fs'
1115

1216
export interface ZipStreamResult {
1317
sizeInBytes: number
1418
hash: string
1519
streamBuffer: WritableStreamBuffer
1620
}
1721

18-
export type ZipReaderResult = {
19-
filename: string
20-
}
21-
2222
export type ZipStreamProps = {
2323
hashAlgorithm: 'md5' | 'sha256'
2424
maxNumberOfFileStreams: number
@@ -49,8 +49,6 @@ const defaultProps: ZipStreamProps = {
4949
* ```
5050
*/
5151
export class ZipStream {
52-
// TypeScript compiler is confused about using ZipWriter as a type
53-
// @ts-ignore
5452
private _zipWriter: ZipWriter<WritableStream>
5553
private _streamBuffer: WritableStreamBuffer
5654
private _hasher: crypto.Hash
@@ -59,10 +57,13 @@ export class ZipStream {
5957
private _filesToZip: [string, string][] = []
6058
private _filesBeingZipped: number = 0
6159
private _maxNumberOfFileStreams: number
62-
boundFileCompletionCallback: (progress: number, total: number) => void
63-
boundFileStartCallback: (totalBytes: number) => void
60+
private _targetFilesAdded: Set<string> = new Set()
61+
62+
boundFileCompletionCallback: (computedSize: number) => Promise<void>
63+
boundFileStartCallback: (computedSize: number) => Promise<void>
6464

6565
constructor(props: Partial<ZipStreamProps> = {}) {
66+
getLogger().debug('Initializing ZipStream with props: %O', props)
6667
// Allow any user-provided values to override default values
6768
const mergedProps = { ...defaultProps, ...props }
6869
const { hashAlgorithm, compressionLevel, maxNumberOfFileStreams } = mergedProps
@@ -86,11 +87,11 @@ export class ZipStream {
8687
this._hasher = crypto.createHash(hashAlgorithm)
8788
}
8889

89-
public onStartCompressingFile(totalBytes: number) {
90+
public async onStartCompressingFile(_totalBytes: number): Promise<void> {
9091
this._filesBeingZipped++
9192
}
9293

93-
public onFinishedCompressingFile(computedsize: number) {
94+
public async onFinishedCompressingFile(_computedsize: number) {
9495
this._numberOfFilesSucceeded++
9596
this._filesBeingZipped--
9697

@@ -104,31 +105,49 @@ export class ZipStream {
104105
})
105106
}
106107
}
107-
108-
public writeString(data: string, path: string) {
109-
return this._zipWriter.add(path, new TextReader(data))
108+
/**
109+
* Writes data to the specified path.
110+
* @param data
111+
* @param path
112+
* @param returnPromise optional parameter specifying if caller wants a promise.
113+
* @returns promise to that resolves when data is written.
114+
*/
115+
public writeString(data: string, path: string, returnPromise: true): Promise<EntryMetaData>
116+
public writeString(data: string, path: string, returnPromise?: false): void
117+
public writeString(data: string, path: string, returnPromise?: boolean): void | Promise<EntryMetaData> {
118+
const promise = this._zipWriter.add(path, new TextReader(data))
119+
return returnPromise ? promise : undefined
110120
}
111121

112-
public writeFile(file: string, path: string) {
122+
/**
123+
* Add the content for file to zip at path.
124+
* @param sourceFilePath file to read
125+
* @param targetFilePath path to write data to in zip.
126+
*/
127+
public writeFile(sourceFilePath: string, targetFilePath: string) {
128+
if (this._targetFilesAdded.has(targetFilePath)) {
129+
getLogger().debug(`Skipping duplicate file added to zip: ${targetFilePath}`)
130+
return
131+
}
132+
this._targetFilesAdded.add(targetFilePath)
113133
// We use _numberOfFilesToStream to make sure we don't finalize too soon
114134
// (before the progress event has been fired for the last file)
115135
// The problem is that we can't rely on progress.entries.total,
116136
// because files can be added to the queue faster
117137
// than the progress event is fired
118138
this._numberOfFilesToStream++
119-
// console.log('this._numberOfFilesToStream is now', this._numberOfFilesToStream)
120139
// We only start zipping another file if we're under our limit
121140
// of concurrent file streams
122141
if (this._filesBeingZipped < this._maxNumberOfFileStreams) {
123-
void readFileAsString(file).then((content) => {
124-
return this._zipWriter.add(path, new TextReader(content), {
142+
void readFileAsString(sourceFilePath).then((content) => {
143+
return this._zipWriter.add(targetFilePath, new TextReader(content), {
125144
onend: this.boundFileCompletionCallback,
126145
onstart: this.boundFileStartCallback,
127146
})
128147
})
129148
} else {
130149
// Queue it for later (see "write" event)
131-
this._filesToZip.push([file, path])
150+
this._filesToZip.push([sourceFilePath, targetFilePath])
132151
}
133152
}
134153

@@ -148,14 +167,23 @@ export class ZipStream {
148167
// We're done streaming all files, so we can close the zip stream
149168

150169
await this._zipWriter.close()
170+
const sizeInBytes = this._streamBuffer.size()
171+
getLogger().debug('Finished finalizing zipStream with %d bytes of data', sizeInBytes)
151172
return {
152-
sizeInBytes: this._streamBuffer.size(),
173+
sizeInBytes,
153174
hash: this._hasher.digest('base64'),
154175
streamBuffer: this._streamBuffer,
155176
}
156177
}
157178

158-
public static async unzip(zipBuffer: Buffer): Promise<ZipReaderResult[]> {
179+
public async finalizeToFile(targetPath: string, onProgress?: (percentComplete: number) => void) {
180+
const result = await this.finalize(onProgress)
181+
const contents = result.streamBuffer.getContents() || Buffer.from('')
182+
await fs.writeFile(targetPath, contents)
183+
return result
184+
}
185+
186+
public static async unzip(zipBuffer: Buffer): Promise<Entry[]> {
159187
const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(zipBuffer)))
160188
try {
161189
return await reader.getEntries()

packages/amazonq/test/unit/codewhisperer/util/zipUtil.test.ts renamed to packages/core/src/test/codewhisperer/zipUtil.test.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,16 @@ import assert from 'assert'
77
import vscode from 'vscode'
88
import sinon from 'sinon'
99
import { join } from 'path'
10-
import { getTestWorkspaceFolder } from 'aws-core-vscode/test'
11-
import { CodeAnalysisScope, CodeWhispererConstants, ZipUtil } from 'aws-core-vscode/codewhisperer'
12-
import { codeScanTruncDirPrefix } from 'aws-core-vscode/codewhisperer'
13-
import { tempDirPath, ToolkitError } from 'aws-core-vscode/shared'
14-
import { LspClient } from 'aws-core-vscode/amazonq'
15-
import { fs } from 'aws-core-vscode/shared'
1610
import path from 'path'
1711
import JSZip from 'jszip'
12+
import { getTestWorkspaceFolder } from '../../testInteg/integrationTestsUtilities'
13+
import { ZipUtil } from '../../codewhisperer/util/zipUtil'
14+
import { CodeAnalysisScope, codeScanTruncDirPrefix } from '../../codewhisperer/models/constants'
15+
import { ToolkitError } from '../../shared/errors'
16+
import { fs } from '../../shared/fs/fs'
17+
import { tempDirPath } from '../../shared/filesystemUtilities'
18+
import { CodeWhispererConstants } from '../../codewhisperer/indexNode'
19+
import { LspClient } from '../../amazonq/lsp/lspClient'
1820

1921
describe('zipUtil', function () {
2022
const workspaceFolder = getTestWorkspaceFolder()

0 commit comments

Comments
 (0)