|
2 | 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
3 | 3 | * SPDX-License-Identifier: Apache-2.0 |
4 | 4 | */ |
5 | | - |
6 | | -import archiver from 'archiver' |
7 | 5 | import { WritableStreamBuffer } from 'stream-buffers' |
8 | 6 | import crypto from 'crypto' |
9 | | -import { getLogger } from '../logger' |
| 7 | +import { readFileAsString } from '../filesystemUtilities' |
| 8 | +// Use require instead of import since this package doesn't support commonjs |
| 9 | +const { ZipWriter, TextReader } = require('@zip.js/zip.js') |
| 10 | +import { getLogger } from '../logger/logger' |
10 | 11 |
|
11 | 12 | export interface ZipStreamResult { |
12 | 13 | sizeInBytes: number |
13 | | - md5: string |
| 14 | + hash: string |
14 | 15 | streamBuffer: WritableStreamBuffer |
15 | 16 | } |
16 | 17 |
|
| 18 | +export type ZipStreamProps = { |
| 19 | + hashAlgorithm: 'md5' | 'sha256' |
| 20 | + maxNumberOfFileStreams: number |
| 21 | + compressionLevel: number |
| 22 | +} |
| 23 | + |
| 24 | +const defaultProps: ZipStreamProps = { |
| 25 | + hashAlgorithm: 'sha256', |
| 26 | + maxNumberOfFileStreams: 100, |
| 27 | + compressionLevel: 1, |
| 28 | +} |
| 29 | + |
17 | 30 | /** |
18 | 31 | * Creates in-memory zip archives that output to a stream buffer. |
19 | 32 | * |
20 | 33 | * Example usage: |
21 | 34 | * ```ts |
22 | | - * const zipStream = new ZipStream() |
| 35 | + * const zipStream = new ZipStream({ |
| 36 | + hashAlgorithm: 'sha256', |
| 37 | + maxNumberOfFileStreams: 150, |
| 38 | + compressionLevel: 1, |
| 39 | + memLevel: 9, |
| 40 | + }) |
23 | 41 | * zipStream.writeString('Hello World', 'file1.txt') |
24 | 42 | * zipStream.writeFile('/path/to/some/file.txt', 'file2.txt') |
25 | | - * const result = await zipStream.finalize() |
26 | | - * console.log(result) // { sizeInBytes: ..., md5: ..., streamBuffer: ... } |
| 43 | + * const result = await zipStream.finalize([optional onProgress handler, called 1x per sec]) |
| 44 | + * console.log(result) // { sizeInBytes: ..., hash: ..., streamBuffer: ... } |
27 | 45 | * ``` |
28 | 46 | */ |
29 | 47 | export class ZipStream { |
30 | | - private _archive: archiver.Archiver |
| 48 | + // TypeScript compiler is confused about using ZipWriter as a type |
| 49 | + // @ts-ignore |
| 50 | + private _zipWriter: ZipWriter<WritableStream> |
31 | 51 | private _streamBuffer: WritableStreamBuffer |
32 | 52 | private _hasher: crypto.Hash |
| 53 | + private _numberOfFilesToStream: number = 0 |
| 54 | + private _numberOfFilesSucceeded: number = 0 |
| 55 | + private _filesToZip: [string, string][] = [] |
| 56 | + private _filesBeingZipped: number = 0 |
| 57 | + private _maxNumberOfFileStreams: number |
| 58 | + boundFileCompletionCallback: (progress: number, total: number) => void |
| 59 | + boundFileStartCallback: (totalBytes: number) => void |
| 60 | + |
| 61 | + constructor(props: Partial<ZipStreamProps> = {}) { |
| 62 | + // Allow any user-provided values to override default values |
| 63 | + const mergedProps = { ...defaultProps, ...props } |
| 64 | + const { hashAlgorithm, compressionLevel, maxNumberOfFileStreams } = mergedProps |
| 65 | + |
| 66 | + this.boundFileCompletionCallback = this.onFinishedCompressingFile.bind(this) |
| 67 | + this.boundFileStartCallback = this.onStartCompressingFile.bind(this) |
| 68 | + |
| 69 | + this._zipWriter = new ZipWriter( |
| 70 | + new WritableStream({ |
| 71 | + write: (chunk) => { |
| 72 | + this._streamBuffer.write(chunk) |
| 73 | + this._hasher.update(chunk) |
| 74 | + }, |
| 75 | + }), |
| 76 | + { level: compressionLevel } |
| 77 | + ) |
| 78 | + this._maxNumberOfFileStreams = maxNumberOfFileStreams |
33 | 79 |
|
34 | | - constructor() { |
35 | | - this._archive = archiver('zip') |
36 | 80 | this._streamBuffer = new WritableStreamBuffer() |
37 | | - this._archive.pipe(this._streamBuffer) |
38 | | - this._hasher = crypto.createHash('md5') |
39 | 81 |
|
40 | | - this._archive.on('data', (data) => { |
41 | | - this._hasher.update(data) |
42 | | - }) |
43 | | - this._archive.on('error', (err) => { |
44 | | - throw err |
45 | | - }) |
46 | | - this._archive.on('warning', (err) => { |
47 | | - getLogger().warn(err) |
48 | | - }) |
| 82 | + this._hasher = crypto.createHash(hashAlgorithm) |
| 83 | + } |
| 84 | + |
| 85 | + public onStartCompressingFile(totalBytes: number) { |
| 86 | + this._filesBeingZipped++ |
| 87 | + } |
| 88 | + |
| 89 | + public onFinishedCompressingFile(computedsize: number) { |
| 90 | + this._numberOfFilesSucceeded++ |
| 91 | + this._filesBeingZipped-- |
| 92 | + |
| 93 | + if (this._filesToZip.length > 0 && this._filesBeingZipped < this._maxNumberOfFileStreams) { |
| 94 | + const [fileToZip, path] = this._filesToZip.shift()! |
| 95 | + void readFileAsString(fileToZip).then((content) => { |
| 96 | + return this._zipWriter.add(path, new TextReader(content), { |
| 97 | + onend: this.boundFileCompletionCallback, |
| 98 | + onstart: this.boundFileStartCallback, |
| 99 | + }) |
| 100 | + }) |
| 101 | + } |
49 | 102 | } |
50 | 103 |
|
51 | 104 | public writeString(data: string, path: string) { |
52 | | - this._archive.append(Buffer.from(data, 'utf-8'), { name: path }) |
| 105 | + return this._zipWriter.add(path, new TextReader(data)) |
53 | 106 | } |
54 | 107 |
|
55 | 108 | public writeFile(file: string, path: string) { |
56 | | - this._archive.file(file, { name: path }) |
| 109 | + // We use _numberOfFilesToStream to make sure we don't finalize too soon |
| 110 | + // (before the progress event has been fired for the last file) |
| 111 | + // The problem is that we can't rely on progress.entries.total, |
| 112 | + // because files can be added to the queue faster |
| 113 | + // than the progress event is fired |
| 114 | + this._numberOfFilesToStream++ |
| 115 | + // console.log('this._numberOfFilesToStream is now', this._numberOfFilesToStream) |
| 116 | + // We only start zipping another file if we're under our limit |
| 117 | + // of concurrent file streams |
| 118 | + if (this._filesBeingZipped < this._maxNumberOfFileStreams) { |
| 119 | + void readFileAsString(file).then((content) => { |
| 120 | + return this._zipWriter.add(path, new TextReader(content), { |
| 121 | + onend: this.boundFileCompletionCallback, |
| 122 | + onstart: this.boundFileStartCallback, |
| 123 | + }) |
| 124 | + }) |
| 125 | + } else { |
| 126 | + // Queue it for later (see "write" event) |
| 127 | + this._filesToZip.push([file, path]) |
| 128 | + } |
57 | 129 | } |
58 | 130 |
|
59 | | - public finalize(): Promise<ZipStreamResult> { |
60 | | - return new Promise((resolve, reject) => { |
61 | | - void this._archive.finalize() |
62 | | - this._archive.on('finish', () => { |
63 | | - resolve({ |
64 | | - sizeInBytes: this._archive.pointer(), |
65 | | - md5: this._hasher.digest('base64'), |
66 | | - streamBuffer: this._streamBuffer, |
67 | | - }) |
| 131 | + public async finalize(onProgress?: (percentComplete: number) => void): Promise<ZipStreamResult> { |
| 132 | + let finished = false |
| 133 | + // We need to poll to check for all the file streams to be completely processed |
| 134 | + // -- we are keeping track of this via the "progress" event handler |
| 135 | + while (!finished) { |
| 136 | + finished = await new Promise((resolve) => { |
| 137 | + setTimeout(() => { |
| 138 | + getLogger().verbose('success is', this._numberOfFilesSucceeded, '/', this._numberOfFilesToStream) |
| 139 | + onProgress?.(Math.floor((100 * this._numberOfFilesSucceeded) / this._numberOfFilesToStream)) |
| 140 | + resolve(this._numberOfFilesToStream <= this._numberOfFilesSucceeded) |
| 141 | + }, 1000) |
68 | 142 | }) |
69 | | - }) |
| 143 | + } |
| 144 | + // We're done streaming all files, so we can close the zip stream |
| 145 | + |
| 146 | + await this._zipWriter.close() |
| 147 | + return { |
| 148 | + sizeInBytes: this._streamBuffer.size(), |
| 149 | + hash: this._hasher.digest('base64'), |
| 150 | + streamBuffer: this._streamBuffer, |
| 151 | + } |
70 | 152 | } |
71 | 153 | } |
0 commit comments