|
2 | 2 | * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
3 | 3 | * SPDX-License-Identifier: Apache-2.0
|
4 | 4 | */
|
5 |
| - |
6 |
| -import archiver from 'archiver' |
7 | 5 | import { WritableStreamBuffer } from 'stream-buffers'
|
8 | 6 | import crypto from 'crypto'
|
9 |
| -import { getLogger } from '../logger' |
| 7 | +import { readFileAsString } from '../filesystemUtilities' |
| 8 | +// Use require instead of import since this package doesn't support commonjs |
| 9 | +const { ZipWriter, TextReader } = require('@zip.js/zip.js') |
| 10 | +import { getLogger } from '../logger/logger' |
10 | 11 |
|
11 | 12 | export interface ZipStreamResult {
|
12 | 13 | sizeInBytes: number
|
13 |
| - md5: string |
| 14 | + hash: string |
14 | 15 | streamBuffer: WritableStreamBuffer
|
15 | 16 | }
|
16 | 17 |
|
| 18 | +export type ZipStreamProps = { |
| 19 | + hashAlgorithm: 'md5' | 'sha256' |
| 20 | + maxNumberOfFileStreams: number |
| 21 | + compressionLevel: number |
| 22 | +} |
| 23 | + |
| 24 | +const defaultProps: ZipStreamProps = { |
| 25 | + hashAlgorithm: 'sha256', |
| 26 | + maxNumberOfFileStreams: 100, |
| 27 | + compressionLevel: 1, |
| 28 | +} |
| 29 | + |
17 | 30 | /**
|
18 | 31 | * Creates in-memory zip archives that output to a stream buffer.
|
19 | 32 | *
|
20 | 33 | * Example usage:
|
21 | 34 | * ```ts
|
22 |
| - * const zipStream = new ZipStream() |
| 35 | + * const zipStream = new ZipStream({ |
| 36 | + hashAlgorithm: 'sha256', |
| 37 | + maxNumberOfFileStreams: 150, |
| 38 | + compressionLevel: 1, |
| 39 | + memLevel: 9, |
| 40 | + }) |
23 | 41 | * zipStream.writeString('Hello World', 'file1.txt')
|
24 | 42 | * zipStream.writeFile('/path/to/some/file.txt', 'file2.txt')
|
25 |
| - * const result = await zipStream.finalize() |
26 |
| - * console.log(result) // { sizeInBytes: ..., md5: ..., streamBuffer: ... } |
| 43 | + * const result = await zipStream.finalize([optional onProgress handler, called 1x per sec]) |
| 44 | + * console.log(result) // { sizeInBytes: ..., hash: ..., streamBuffer: ... } |
27 | 45 | * ```
|
28 | 46 | */
|
29 | 47 | export class ZipStream {
|
30 |
| - private _archive: archiver.Archiver |
| 48 | + // TypeScript compiler is confused about using ZipWriter as a type |
| 49 | + // @ts-ignore |
| 50 | + private _zipWriter: ZipWriter<WritableStream> |
31 | 51 | private _streamBuffer: WritableStreamBuffer
|
32 | 52 | private _hasher: crypto.Hash
|
| 53 | + private _numberOfFilesToStream: number = 0 |
| 54 | + private _numberOfFilesSucceeded: number = 0 |
| 55 | + private _filesToZip: [string, string][] = [] |
| 56 | + private _filesBeingZipped: number = 0 |
| 57 | + private _maxNumberOfFileStreams: number |
| 58 | + boundFileCompletionCallback: (progress: number, total: number) => void |
| 59 | + boundFileStartCallback: (totalBytes: number) => void |
| 60 | + |
| 61 | + constructor(props: Partial<ZipStreamProps> = {}) { |
| 62 | + // Allow any user-provided values to override default values |
| 63 | + const mergedProps = { ...defaultProps, ...props } |
| 64 | + const { hashAlgorithm, compressionLevel, maxNumberOfFileStreams } = mergedProps |
| 65 | + |
| 66 | + this.boundFileCompletionCallback = this.onFinishedCompressingFile.bind(this) |
| 67 | + this.boundFileStartCallback = this.onStartCompressingFile.bind(this) |
| 68 | + |
| 69 | + this._zipWriter = new ZipWriter( |
| 70 | + new WritableStream({ |
| 71 | + write: (chunk) => { |
| 72 | + this._streamBuffer.write(chunk) |
| 73 | + this._hasher.update(chunk) |
| 74 | + }, |
| 75 | + }), |
| 76 | + { level: compressionLevel } |
| 77 | + ) |
| 78 | + this._maxNumberOfFileStreams = maxNumberOfFileStreams |
33 | 79 |
|
34 |
| - constructor() { |
35 |
| - this._archive = archiver('zip') |
36 | 80 | this._streamBuffer = new WritableStreamBuffer()
|
37 |
| - this._archive.pipe(this._streamBuffer) |
38 |
| - this._hasher = crypto.createHash('md5') |
39 | 81 |
|
40 |
| - this._archive.on('data', (data) => { |
41 |
| - this._hasher.update(data) |
42 |
| - }) |
43 |
| - this._archive.on('error', (err) => { |
44 |
| - throw err |
45 |
| - }) |
46 |
| - this._archive.on('warning', (err) => { |
47 |
| - getLogger().warn(err) |
48 |
| - }) |
| 82 | + this._hasher = crypto.createHash(hashAlgorithm) |
| 83 | + } |
| 84 | + |
| 85 | + public onStartCompressingFile(totalBytes: number) { |
| 86 | + this._filesBeingZipped++ |
| 87 | + } |
| 88 | + |
| 89 | + public onFinishedCompressingFile(computedsize: number) { |
| 90 | + this._numberOfFilesSucceeded++ |
| 91 | + this._filesBeingZipped-- |
| 92 | + |
| 93 | + if (this._filesToZip.length > 0 && this._filesBeingZipped < this._maxNumberOfFileStreams) { |
| 94 | + const [fileToZip, path] = this._filesToZip.shift()! |
| 95 | + void readFileAsString(fileToZip).then((content) => { |
| 96 | + return this._zipWriter.add(path, new TextReader(content), { |
| 97 | + onend: this.boundFileCompletionCallback, |
| 98 | + onstart: this.boundFileStartCallback, |
| 99 | + }) |
| 100 | + }) |
| 101 | + } |
49 | 102 | }
|
50 | 103 |
|
51 | 104 | public writeString(data: string, path: string) {
|
52 |
| - this._archive.append(Buffer.from(data, 'utf-8'), { name: path }) |
| 105 | + return this._zipWriter.add(path, new TextReader(data)) |
53 | 106 | }
|
54 | 107 |
|
55 | 108 | public writeFile(file: string, path: string) {
|
56 |
| - this._archive.file(file, { name: path }) |
| 109 | + // We use _numberOfFilesToStream to make sure we don't finalize too soon |
| 110 | + // (before the progress event has been fired for the last file) |
| 111 | + // The problem is that we can't rely on progress.entries.total, |
| 112 | + // because files can be added to the queue faster |
| 113 | + // than the progress event is fired |
| 114 | + this._numberOfFilesToStream++ |
| 115 | + // console.log('this._numberOfFilesToStream is now', this._numberOfFilesToStream) |
| 116 | + // We only start zipping another file if we're under our limit |
| 117 | + // of concurrent file streams |
| 118 | + if (this._filesBeingZipped < this._maxNumberOfFileStreams) { |
| 119 | + void readFileAsString(file).then((content) => { |
| 120 | + return this._zipWriter.add(path, new TextReader(content), { |
| 121 | + onend: this.boundFileCompletionCallback, |
| 122 | + onstart: this.boundFileStartCallback, |
| 123 | + }) |
| 124 | + }) |
| 125 | + } else { |
| 126 | + // Queue it for later (see "write" event) |
| 127 | + this._filesToZip.push([file, path]) |
| 128 | + } |
57 | 129 | }
|
58 | 130 |
|
59 |
| - public finalize(): Promise<ZipStreamResult> { |
60 |
| - return new Promise((resolve, reject) => { |
61 |
| - void this._archive.finalize() |
62 |
| - this._archive.on('finish', () => { |
63 |
| - resolve({ |
64 |
| - sizeInBytes: this._archive.pointer(), |
65 |
| - md5: this._hasher.digest('base64'), |
66 |
| - streamBuffer: this._streamBuffer, |
67 |
| - }) |
| 131 | + public async finalize(onProgress?: (percentComplete: number) => void): Promise<ZipStreamResult> { |
| 132 | + let finished = false |
| 133 | + // We need to poll to check for all the file streams to be completely processed |
| 134 | + // -- we are keeping track of this via the "progress" event handler |
| 135 | + while (!finished) { |
| 136 | + finished = await new Promise((resolve) => { |
| 137 | + setTimeout(() => { |
| 138 | + getLogger().verbose('success is', this._numberOfFilesSucceeded, '/', this._numberOfFilesToStream) |
| 139 | + onProgress?.(Math.floor((100 * this._numberOfFilesSucceeded) / this._numberOfFilesToStream)) |
| 140 | + resolve(this._numberOfFilesToStream <= this._numberOfFilesSucceeded) |
| 141 | + }, 1000) |
68 | 142 | })
|
69 |
| - }) |
| 143 | + } |
| 144 | + // We're done streaming all files, so we can close the zip stream |
| 145 | + |
| 146 | + await this._zipWriter.close() |
| 147 | + return { |
| 148 | + sizeInBytes: this._streamBuffer.size(), |
| 149 | + hash: this._hasher.digest('base64'), |
| 150 | + streamBuffer: this._streamBuffer, |
| 151 | + } |
70 | 152 | }
|
71 | 153 | }
|
0 commit comments