Skip to content

Commit 813b154

Browse files
committed
For issue #48, add Gunzipper that relies on DecompressionStream('gzip').
1 parent d01610a commit 813b154

File tree

4 files changed

+179
-1
lines changed

4 files changed

+179
-1
lines changed

archive/decompress.js

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,31 @@ export class Untarrer extends Unarchiver {
319319
getScriptFileName() { return './untar.js'; };
320320
}
321321

322+
/**
323+
* IMPORTANT NOTES for Gunzipper:
324+
* 1) A Gunzipper will only ever emit one EXTRACT event, because a gzipped file only ever contains
325+
* a single file.
326+
* 2) If the gzipped file does not include the original filename as a FNAME block, then the
327+
* UnarchivedFile in the UnarchiveExtractEvent will not include a filename. It will be up to the
328+
* client to re-assemble the filename (if needed).
329+
* 3) update() is not supported on a Gunzipper, since the current implementation relies on runtime
330+
* support for DecompressionStream('gzip') which can throw hard-to-detect errors reading only
331+
* only part of a file.
332+
* 4) PROGRESS events are not yet supported in Gunzipper.
333+
*/
334+
export class Gunzipper extends Unarchiver {
335+
/**
336+
* @param {ArrayBuffer} ab
337+
* @param {UnarchiverOptions} options
338+
*/
339+
constructor(ab, options = {}) {
340+
super(ab, options);
341+
}
342+
343+
getMIMEType() { return 'application/gzip'; }
344+
getScriptFileName() { return './gunzip.js'; }
345+
}
346+
322347
// TODO(2.0): When up-revving to a major new version, remove the string type for options.
323348

324349
/**
@@ -344,6 +369,8 @@ export function getUnarchiver(ab, options = {}) {
344369
unarchiver = new Unrarrer(ab, options);
345370
} else if (mimeType === 'application/zip') { // PK (Zip)
346371
unarchiver = new Unzipper(ab, options);
372+
} else if (mimeType === 'application/gzip') { // GZIP
373+
unarchiver = new Gunzipper(ab, options);
347374
} else { // Try with tar
348375
unarchiver = new Untarrer(ab, options);
349376
}

archive/gunzip.js

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/**
2+
* gunzip.js
3+
*
4+
* Licensed under the MIT License
5+
*
6+
* Copyright(c) 2024 Google Inc.
7+
*
8+
* Reference Documentation:
9+
*
10+
* https://www.ietf.org/rfc/rfc1952.txt
11+
*/
12+
13+
import { BitStream } from '../io/bitstream.js';
14+
import { ByteStream } from '../io/bytestream.js';
15+
16+
/** @type {MessagePort} */
17+
let hostPort;
18+
19+
/** @type {ByteStream} */
20+
let bstream = null;
21+
// undefined unless a FNAME block is present.
22+
let filename;
23+
24+
const err = str => hostPort.postMessage({ type: 'error', msg: str });
25+
26+
async function gunzip() {
27+
const sig = bstream.readBytes(2);
28+
if (sig[0] !== 0x1F || sig[1] !== 0x8B) {
29+
const errMsg = `First two bytes not 0x1F, 0x8B: ${sig[0].toString(16)} ${sig[1].toString(16)}`;
30+
err(errMsg);
31+
return;
32+
}
33+
const compressionMethod = bstream.readNumber(1);
34+
if (compressionMethod !== 8) {
35+
const errMsg = `Compression method ${compressionMethod} not supported`;
36+
err(errMsg);
37+
return;
38+
}
39+
40+
// Parse the GZIP header to see if we can find a filename (FNAME block).
41+
const flags = new BitStream(bstream.readBytes(1).buffer);
42+
flags.skip(1); // skip FTEXT bit
43+
const fhcrc = flags.readBits(1);
44+
const fextra = flags.readBits(1);
45+
const fname = flags.readBits(1);
46+
const fcomment = flags.readBits(1);
47+
48+
bstream.skip(4); // MTIME
49+
bstream.skip(1); // XFL
50+
bstream.skip(1); // OS
51+
52+
if (fextra) {
53+
const xlen = bstream.readNumber(2);
54+
bstream.skip(xlen);
55+
}
56+
57+
if (fname) {
58+
// Find the null-terminator byte.
59+
let numBytes = 0;
60+
const findNull = bstream.tee();
61+
while (findNull.readNumber(1) !== 0) numBytes++;
62+
filename = bstream.readString(numBytes);
63+
}
64+
65+
if (fcomment) {
66+
// Find the null-terminator byte.
67+
let numBytes = 0;
68+
const findNull = bstream.tee();
69+
while (findNull.readNumber(1) !== 0) numBytes++;
70+
bstream.skip(numBytes); // COMMENT
71+
}
72+
73+
if (fhcrc) {
74+
bstream.readNumber(2); // CRC16
75+
}
76+
77+
// Now try to use native implementation of INFLATE, if supported by the runtime.
78+
const blob = new Blob([bstream.bytes.buffer]);
79+
const decompressedStream = blob.stream().pipeThrough(new DecompressionStream('gzip'));
80+
const fileData = new Uint8Array(await new Response(decompressedStream).arrayBuffer());
81+
const unarchivedFile = { filename, fileData };
82+
hostPort.postMessage({ type: 'extract', unarchivedFile }, [fileData.buffer]);
83+
84+
// TODO: Supported chunked decompression?
85+
// TODO: Fall through to non-native implementation via inflate() ?
86+
87+
hostPort.postMessage({ type: 'finish', metadata: {} });
88+
}
89+
90+
// event.data.file has the first ArrayBuffer.
91+
const onmessage = async function (event) {
92+
const bytes = event.data.file;
93+
94+
if (!bstream) {
95+
bstream = new ByteStream(bytes);
96+
bstream.setLittleEndian(true);
97+
} else {
98+
throw `Gunzipper does not calling update() with more bytes. Send the whole file with start().`
99+
}
100+
101+
await gunzip();
102+
};
103+
104+
/**
105+
* Connect the host to the gunzip implementation with the given MessagePort.
106+
* @param {MessagePort} port
107+
*/
108+
export function connect(port) {
109+
if (hostPort) {
110+
throw `connect(): hostPort already connected in gunzip.js`;
111+
}
112+
113+
hostPort = port;
114+
port.onmessage = onmessage;
115+
}
116+
117+
export function disconnect() {
118+
if (!hostPort) {
119+
throw `disconnect(): hostPort was not connected in gunzip.js`;
120+
}
121+
122+
hostPort = null;
123+
bstream = null;
124+
filename = undefined;
125+
}

tests/archive-decompress.spec.js

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import * as fs from 'node:fs';
22
import 'mocha';
33
import { expect } from 'chai';
44

5-
import { Unarchiver, Unrarrer, Untarrer, Unzipper, getUnarchiver } from '../archive/decompress.js';
5+
import { Gunzipper, Unarchiver, getUnarchiver } from '../archive/decompress.js';
66

77
const PATH = `tests/archive-testfiles/`;
88

@@ -69,4 +69,30 @@ describe('bitjs.archive.decompress', () => {
6969
expect(extractEvtFiredForOnExtract).equals(true);
7070
});
7171
}
72+
73+
describe('gunzip', () => {
74+
it('can unzip a file', async () => {
75+
const bufs = new Map(inputArrayBuffers);
76+
const nodeBuf = fs.readFileSync(`${PATH}sample-1-slowest.txt.gz`);
77+
const ab = nodeBuf.buffer.slice(nodeBuf.byteOffset, nodeBuf.byteOffset + nodeBuf.length);
78+
let gunzipper = getUnarchiver(ab, {debug: true});
79+
expect(gunzipper instanceof Gunzipper).equals(true);
80+
let extractEvtFiredForOnExtract = false;
81+
82+
gunzipper.onExtract(evt => {
83+
extractEvtFiredForOnExtract = true;
84+
const {filename, fileData} = evt.unarchivedFile;
85+
expect(filename).equals('sample-1.txt');
86+
87+
const ab = bufs.get('sample-1.txt');
88+
expect(fileData.byteLength).equals(ab.byteLength);
89+
for (let b = 0; b < fileData.byteLength; ++b) {
90+
expect(fileData[b] === ab[b]);
91+
}
92+
});
93+
94+
await gunzipper.start();
95+
expect(extractEvtFiredForOnExtract).equals(true);
96+
});
97+
});
7298
});
176 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)