Skip to content

Commit 9a2f3b7

Browse files
committed
Split out gzip huffman stuff
1 parent 44f57fd commit 9a2f3b7

File tree

2 files changed

+127
-117
lines changed

2 files changed

+127
-117
lines changed

src/gzip.huffman.js

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// Adapted from https://github.com/101arrowz/fflate Copyright (c) 2023 Arjun Barrett
2+
// https://tools.ietf.org/html/rfc1951
3+
4+
// fixed length extra bits
5+
export const fixedLengthExtraBits = new Uint8Array([
6+
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, /* unused */ 0, 0, /* impossible */ 0,
7+
])
8+
export const fixedDistanceExtraBits = new Uint8Array([
9+
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, /* unused */ 0, 0,
10+
])
11+
12+
/**
13+
* get base, reverse index map from extra bits
14+
* @param {Uint8Array} eb
15+
* @param {number} start
16+
* @returns {{base: Uint16Array, rev: Int32Array}}
17+
*/
18+
function freb(eb, start) {
19+
const base = new Uint16Array(31)
20+
for (let i = 0; i < 31; i++) {
21+
base[i] = start += 1 << eb[i - 1]
22+
}
23+
// numbers here are max 18 bits
24+
const rev = new Int32Array(base[30])
25+
for (let i = 1; i < 30; i++) {
26+
for (let j = base[i]; j < base[i + 1]; ++j) {
27+
rev[j] = j - base[i] << 5 | i
28+
}
29+
}
30+
return { base, rev }
31+
}
32+
33+
const { base: fixedLength, rev: revfl } = freb(fixedLengthExtraBits, 2)
34+
// we can ignore the fact that the other numbers are wrong; they never happen anyway
35+
fixedLength[28] = 258
36+
revfl[258] = 28
37+
const { base: fixedDistance } = freb(fixedDistanceExtraBits, 0)
38+
39+
// map of value to reverse (assuming 16 bits)
40+
const rev = new Uint16Array(32768)
41+
for (let i = 0; i < 32768; i++) {
42+
// reverse table algorithm from SO
43+
let x = (i & 0xAAAA) >> 1 | (i & 0x5555) << 1
44+
x = (x & 0xCCCC) >> 2 | (x & 0x3333) << 2
45+
x = (x & 0xF0F0) >> 4 | (x & 0x0F0F) << 4
46+
rev[i] = ((x & 0xFF00) >> 8 | (x & 0x00FF) << 8) >> 1
47+
}
48+
49+
/**
50+
* create huffman tree from Uint8Array "map": index -> code length for code index
51+
* maxBits must be at most 15
52+
* @param {Uint8Array} cd
53+
* @param {number} maxBits
54+
* @param {0 | 1} r
55+
* @returns {Uint16Array}
56+
*/
57+
export function huffMap(cd, maxBits, r) {
58+
// u16 "map": index -> # of codes with bit length = index
59+
const l = new Uint16Array(maxBits)
60+
// length of cd must be 288 (total # of codes)
61+
for (let i = 0; i < cd.length; i++) {
62+
if (cd[i]) ++l[cd[i] - 1]
63+
}
64+
// u16 "map": index -> minimum code for bit length = index
65+
const le = new Uint16Array(maxBits)
66+
for (let i = 1; i < maxBits; i++) {
67+
le[i] = le[i - 1] + l[i - 1] << 1
68+
}
69+
let co
70+
if (r) {
71+
// u16 "map": index -> number of actual bits, symbol for code
72+
co = new Uint16Array(1 << maxBits)
73+
// bits to remove for reverser
74+
const rvb = 15 - maxBits
75+
for (let i = 0; i < cd.length; i++) {
76+
// ignore 0 lengths
77+
if (cd[i]) {
78+
// num encoding both symbol and bits read
79+
const sv = i << 4 | cd[i]
80+
const freeBits = maxBits - cd[i]
81+
let startValue = le[cd[i] - 1]++ << freeBits
82+
for (const endValue = startValue | (1 << freeBits) - 1; startValue <= endValue; startValue++) {
83+
// every 16 bit value starting with the code yields the same result
84+
co[rev[startValue] >> rvb] = sv
85+
}
86+
}
87+
}
88+
} else {
89+
co = new Uint16Array(cd.length)
90+
for (let i = 0; i < cd.length; i++) {
91+
if (cd[i]) {
92+
co[i] = rev[le[cd[i] - 1]++] >> 15 - cd[i]
93+
}
94+
}
95+
}
96+
return co
97+
}
98+
99+
// construct huffman trees
100+
const fixedLengthTree = new Uint8Array(288)
101+
for (let i = 0; i < 144; i++) fixedLengthTree[i] = 8
102+
for (let i = 144; i < 256; i++) fixedLengthTree[i] = 9
103+
for (let i = 256; i < 280; i++) fixedLengthTree[i] = 7
104+
for (let i = 280; i < 288; i++) fixedLengthTree[i] = 8
105+
const fixedDistanceTree = new Uint8Array(32)
106+
for (let i = 0; i < 32; i++) fixedDistanceTree[i] = 5
107+
108+
export const fixedLengthMap = /*#__PURE__*/ huffMap(fixedLengthTree, 9, 1)
109+
export const fixedDistanceMap = /*#__PURE__*/ huffMap(fixedDistanceTree, 5, 1)
110+
export { fixedLength, fixedDistance }

src/gzip.js

Lines changed: 17 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,9 @@
11
// Adapted from https://github.com/101arrowz/fflate Copyright (c) 2023 Arjun Barrett
22
// https://tools.ietf.org/html/rfc1951
33

4-
// fixed length extra bits
5-
const fixedLengthExtraBits = new Uint8Array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, /* unused */ 0, 0, /* impossible */ 0])
6-
const fixedDistanceExtraBits = new Uint8Array([0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, /* unused */ 0, 0])
7-
const codeLengthIndexMap = new Uint8Array([16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15])
8-
9-
/**
10-
* get base, reverse index map from extra bits
11-
* @param {Uint8Array} eb
12-
* @param {number} start
13-
* @returns {{base: Uint16Array, rev: Int32Array}}
14-
*/
15-
function freb(eb, start) {
16-
const base = new Uint16Array(31)
17-
for (let i = 0; i < 31; i++) {
18-
base[i] = start += 1 << eb[i - 1]
19-
}
20-
// numbers here are max 18 bits
21-
const rev = new Int32Array(base[30])
22-
for (let i = 1; i < 30; i++) {
23-
for (let j = base[i]; j < base[i + 1]; ++j) {
24-
rev[j] = j - base[i] << 5 | i
25-
}
26-
}
27-
return { base, rev }
28-
}
29-
30-
const { base: fl, rev: revfl } = freb(fixedLengthExtraBits, 2)
31-
// we can ignore the fact that the other numbers are wrong; they never happen anyway
32-
fl[28] = 258
33-
revfl[258] = 28
34-
const { base: fd } = freb(fixedDistanceExtraBits, 0)
35-
36-
// map of value to reverse (assuming 16 bits)
37-
const rev = new Uint16Array(32768)
38-
for (let i = 0; i < 32768; i++) {
39-
// reverse table algorithm from SO
40-
let x = (i & 0xAAAA) >> 1 | (i & 0x5555) << 1
41-
x = (x & 0xCCCC) >> 2 | (x & 0x3333) << 2
42-
x = (x & 0xF0F0) >> 4 | (x & 0x0F0F) << 4
43-
rev[i] = ((x & 0xFF00) >> 8 | (x & 0x00FF) << 8) >> 1
44-
}
45-
46-
/**
47-
* create huffman tree from Uint8Array "map": index -> code length for code index
48-
* maxBits must be at most 15
49-
* @param {Uint8Array} cd
50-
* @param {number} maxBits
51-
* @param {0 | 1} r
52-
* @returns {Uint16Array}
53-
*/
54-
function huffMap(cd, maxBits, r) {
55-
// u16 "map": index -> # of codes with bit length = index
56-
const l = new Uint16Array(maxBits)
57-
// length of cd must be 288 (total # of codes)
58-
for (let i = 0; i < cd.length; i++) {
59-
if (cd[i]) ++l[cd[i] - 1]
60-
}
61-
// u16 "map": index -> minimum code for bit length = index
62-
const le = new Uint16Array(maxBits)
63-
for (let i = 1; i < maxBits; i++) {
64-
le[i] = le[i - 1] + l[i - 1] << 1
65-
}
66-
let co
67-
if (r) {
68-
// u16 "map": index -> number of actual bits, symbol for code
69-
co = new Uint16Array(1 << maxBits)
70-
// bits to remove for reverser
71-
const rvb = 15 - maxBits
72-
for (let i = 0; i < cd.length; i++) {
73-
// ignore 0 lengths
74-
if (cd[i]) {
75-
// num encoding both symbol and bits read
76-
const sv = i << 4 | cd[i]
77-
const freeBits = maxBits - cd[i]
78-
let startValue = le[cd[i] - 1]++ << freeBits
79-
for (const endValue = startValue | (1 << freeBits) - 1; startValue <= endValue; startValue++) {
80-
// every 16 bit value starting with the code yields the same result
81-
co[rev[startValue] >> rvb] = sv
82-
}
83-
}
84-
}
85-
} else {
86-
co = new Uint16Array(cd.length)
87-
for (let i = 0; i < cd.length; i++) {
88-
if (cd[i]) {
89-
co[i] = rev[le[cd[i] - 1]++] >> 15 - cd[i]
90-
}
91-
}
92-
}
93-
return co
94-
}
4+
import { fixedDistance, fixedDistanceExtraBits, fixedDistanceMap, fixedLength, fixedLengthExtraBits, fixedLengthMap, huffMap } from './gzip.huffman.js'
955

96-
// construct huffman trees
97-
const fixedLengthTree = new Uint8Array(288)
98-
for (let i = 0; i < 144; i++) fixedLengthTree[i] = 8
99-
for (let i = 144; i < 256; i++) fixedLengthTree[i] = 9
100-
for (let i = 256; i < 280; i++) fixedLengthTree[i] = 7
101-
for (let i = 280; i < 288; i++) fixedLengthTree[i] = 8
102-
const fixedDistanceTree = new Uint8Array(32)
103-
for (let i = 0; i < 32; i++) fixedDistanceTree[i] = 5
104-
const fixedLengthMap = /*#__PURE__*/ huffMap(fixedLengthTree, 9, 1)
105-
const fixedDistanceMap = /*#__PURE__*/ huffMap(fixedDistanceTree, 5, 1)
6+
const codeLengthIndexMap = new Uint8Array([16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15])
1067

1078
/**
1089
* find max of array
@@ -180,11 +81,11 @@ export function gunzip(input, out, inputIndex = 0, outputIndex = 0) {
18081
let final = 0 // last chunk?
18182
let lengthBits = 0
18283
let distBits = 0
183-
let lmap
184-
let dmap
84+
let lengthMap
85+
let distMap
18586
const totalBits = input.length * 8
18687
do {
187-
if (!lmap) {
88+
if (!lengthMap) {
18889
// final chunk is next?
18990
final = bits(input, pos, 1)
19091
const type = bits(input, pos + 1, 3)
@@ -203,8 +104,8 @@ export function gunzip(input, out, inputIndex = 0, outputIndex = 0) {
203104
continue
204105
} else if (type === 1) {
205106
// fixed huffman
206-
lmap = fixedLengthMap
207-
dmap = fixedDistanceMap
107+
lengthMap = fixedLengthMap
108+
distMap = fixedDistanceMap
208109
lengthBits = 9
209110
distBits = 5
210111
} else if (type === 2) {
@@ -251,12 +152,11 @@ export function gunzip(input, out, inputIndex = 0, outputIndex = 0) {
251152
}
252153
const lengthTree = lengthDistanceTree.subarray(0, hLiteral)
253154
const distanceTree = lengthDistanceTree.subarray(hLiteral)
254-
// max length bits
155+
// max length/dist bits
255156
lengthBits = max(lengthTree)
256-
// max dist bits
257157
distBits = max(distanceTree)
258-
lmap = huffMap(lengthTree, lengthBits, 1)
259-
dmap = huffMap(distanceTree, distBits, 1)
158+
lengthMap = huffMap(lengthTree, lengthBits, 1)
159+
distMap = huffMap(distanceTree, distBits, 1)
260160
} else throw new Error('invalid block type')
261161
if (pos > totalBits) throw new Error('unexpected EOF')
262162
}
@@ -265,32 +165,32 @@ export function gunzip(input, out, inputIndex = 0, outputIndex = 0) {
265165
let lpos = pos
266166
for (;; lpos = pos) {
267167
// bits read, code
268-
const code = lmap[bits16(input, pos) & lms]
168+
const code = lengthMap[bits16(input, pos) & lms]
269169
const sym = code >> 4
270170
pos += code & 15
271171
if (pos > totalBits) throw new Error('unexpected EOF')
272172
if (!code) throw new Error('invalid length/literal')
273173
if (sym < 256) out[outputIndex++] = sym
274174
else if (sym === 256) {
275175
lpos = pos
276-
lmap = undefined
176+
lengthMap = undefined
277177
break
278178
} else {
279179
let add = sym - 254
280180
// no extra bits needed if less
281181
if (sym > 264) {
282182
const index = sym - 257
283183
const b = fixedLengthExtraBits[index]
284-
add = bits(input, pos, (1 << b) - 1) + fl[index]
184+
add = bits(input, pos, (1 << b) - 1) + fixedLength[index]
285185
pos += b
286186
}
287187
// dist
288-
if (!dmap) throw new Error('invalid distance map')
289-
const d = dmap[bits16(input, pos) & dms]
188+
if (!distMap) throw new Error('invalid distance map')
189+
const d = distMap[bits16(input, pos) & dms]
290190
const dsym = d >> 4
291191
if (!d) throw new Error('invalid distance')
292192
pos += d & 15
293-
let dt = fd[dsym]
193+
let dt = fixedDistance[dsym]
294194
if (dsym > 3) {
295195
const b = fixedDistanceExtraBits[dsym]
296196
dt += bits16(input, pos) & (1 << b) - 1
@@ -303,7 +203,7 @@ export function gunzip(input, out, inputIndex = 0, outputIndex = 0) {
303203
}
304204
}
305205
pos = lpos
306-
if (lmap) final = 1
206+
if (lengthMap) final = 1
307207
} while (!final)
308208

309209
if (outputIndex < out.length) {

0 commit comments

Comments
 (0)