Skip to content

Commit a2a5ca6

Browse files
committed
Add emscripten's mini-lz4.js.
1 parent d29709e commit a2a5ca6

File tree

1 file changed

+345
-0
lines changed

1 file changed

+345
-0
lines changed

scripts/ci/mini-lz4.js

Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
/*
2+
MiniLZ4: Minimal LZ4 block decoding and encoding.
3+
4+
based off of node-lz4, https://github.com/pierrec/node-lz4
5+
6+
====
7+
Copyright (c) 2012 Pierre Curto
8+
9+
Permission is hereby granted, free of charge, to any person obtaining a copy
10+
of this software and associated documentation files (the "Software"), to deal
11+
in the Software without restriction, including without limitation the rights
12+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13+
copies of the Software, and to permit persons to whom the Software is
14+
furnished to do so, subject to the following conditions:
15+
16+
The above copyright notice and this permission notice shall be included in
17+
all copies or substantial portions of the Software.
18+
19+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25+
THE SOFTWARE.
26+
====
27+
28+
changes have the same license
29+
*/
30+
31+
var MiniLZ4 = (function() {
32+
33+
var exports = {};
34+
35+
/**
36+
* Decode a block. Assumptions: input contains all sequences of a
37+
* chunk, output is large enough to receive the decoded data.
38+
* If the output buffer is too small, an error will be thrown.
39+
* If the returned value is negative, an error occured at the returned offset.
40+
*
41+
* @param {ArrayBufferView} input input data
42+
* @param {ArrayBufferView} output output data
43+
* @param {number=} sIdx
44+
* @param {number=} eIdx
45+
* @return {number} number of decoded bytes
46+
* @private
47+
*/
48+
exports.uncompress = function (input, output, sIdx, eIdx) {
49+
sIdx = sIdx || 0
50+
eIdx = eIdx || (input.length - sIdx)
51+
// Process each sequence in the incoming data
52+
for (var i = sIdx, n = eIdx, j = 0; i < n;) {
53+
var token = input[i++]
54+
55+
// Literals
56+
var literals_length = (token >> 4)
57+
if (literals_length > 0) {
58+
// length of literals
59+
var l = literals_length + 240
60+
while (l === 255) {
61+
l = input[i++]
62+
literals_length += l
63+
}
64+
65+
// Copy the literals
66+
var end = i + literals_length
67+
while (i < end) output[j++] = input[i++]
68+
69+
// End of buffer?
70+
if (i === n) return j
71+
}
72+
73+
// Match copy
74+
// 2 bytes offset (little endian)
75+
var offset = input[i++] | (input[i++] << 8)
76+
77+
// XXX 0 is an invalid offset value
78+
if (offset === 0) return j
79+
if (offset > j) return -(i-2)
80+
81+
// length of match copy
82+
var match_length = (token & 0xf)
83+
var l = match_length + 240
84+
while (l === 255) {
85+
l = input[i++]
86+
match_length += l
87+
}
88+
89+
// Copy the match
90+
var pos = j - offset // position of the match copy in the current output
91+
var end = j + match_length + 4 // minmatch = 4
92+
while (j < end) output[j++] = output[pos++]
93+
}
94+
95+
return j
96+
}
97+
98+
var
99+
maxInputSize = 0x7E000000
100+
, minMatch = 4
101+
// uint32() optimization
102+
, hashLog = 16
103+
, hashShift = (minMatch * 8) - hashLog
104+
, hashSize = 1 << hashLog
105+
106+
, copyLength = 8
107+
, lastLiterals = 5
108+
, mfLimit = copyLength + minMatch
109+
, skipStrength = 6
110+
111+
, mlBits = 4
112+
, mlMask = (1 << mlBits) - 1
113+
, runBits = 8 - mlBits
114+
, runMask = (1 << runBits) - 1
115+
116+
, hasher = /* XXX uint32( */ 2654435761 /* ) */
117+
118+
assert(hashShift === 16);
119+
var hashTable = new Int16Array(1<<16);
120+
var empty = new Int16Array(hashTable.length);
121+
122+
// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
123+
exports.compressBound = function (isize) {
124+
return isize > maxInputSize
125+
? 0
126+
: (isize + (isize/255) + 16) | 0
127+
}
128+
129+
/** @param {number=} sIdx
130+
@param {number=} eIdx */
131+
exports.compress = function (src, dst, sIdx, eIdx) {
132+
hashTable.set(empty);
133+
return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
134+
}
135+
136+
function compressBlock (src, dst, pos, sIdx, eIdx) {
137+
// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
138+
var dpos = sIdx
139+
var dlen = eIdx - sIdx
140+
var anchor = 0
141+
142+
if (src.length >= maxInputSize) throw new Error("input too large")
143+
144+
// Minimum of input bytes for compression (LZ4 specs)
145+
if (src.length > mfLimit) {
146+
var n = exports.compressBound(src.length)
147+
if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)
148+
149+
var
150+
step = 1
151+
, findMatchAttempts = (1 << skipStrength) + 3
152+
// Keep last few bytes incompressible (LZ4 specs):
153+
// last 5 bytes must be literals
154+
, srcLength = src.length - mfLimit
155+
156+
while (pos + minMatch < srcLength) {
157+
// Find a match
158+
// min match of 4 bytes aka sequence
159+
var sequenceLowBits = src[pos+1]<<8 | src[pos]
160+
var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
161+
// compute hash for the current sequence
162+
var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
163+
/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
164+
.multiply(hasher)
165+
.shiftr(hashShift)
166+
.toNumber() */
167+
// get the position of the sequence matching the hash
168+
// NB. since 2 different sequences may have the same hash
169+
// it is double-checked below
170+
// do -1 to distinguish between initialized and uninitialized values
171+
var ref = hashTable[hash] - 1
172+
// save position of current sequence in hash table
173+
hashTable[hash] = pos + 1
174+
175+
// first reference or within 64k limit or current sequence !== hashed one: no match
176+
if ( ref < 0 ||
177+
((pos - ref) >>> 16) > 0 ||
178+
(
179+
((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
180+
((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
181+
)
182+
) {
183+
// increase step if nothing found within limit
184+
step = findMatchAttempts++ >> skipStrength
185+
pos += step
186+
continue
187+
}
188+
189+
findMatchAttempts = (1 << skipStrength) + 3
190+
191+
// got a match
192+
var literals_length = pos - anchor
193+
var offset = pos - ref
194+
195+
// minMatch already verified
196+
pos += minMatch
197+
ref += minMatch
198+
199+
// move to the end of the match (>=minMatch)
200+
var match_length = pos
201+
while (pos < srcLength && src[pos] == src[ref]) {
202+
pos++
203+
ref++
204+
}
205+
206+
// match length
207+
match_length = pos - match_length
208+
209+
// token
210+
var token = match_length < mlMask ? match_length : mlMask
211+
212+
// encode literals length
213+
if (literals_length >= runMask) {
214+
// add match length to the token
215+
dst[dpos++] = (runMask << mlBits) + token
216+
for (var len = literals_length - runMask; len > 254; len -= 255) {
217+
dst[dpos++] = 255
218+
}
219+
dst[dpos++] = len
220+
} else {
221+
// add match length to the token
222+
dst[dpos++] = (literals_length << mlBits) + token
223+
}
224+
225+
// write literals
226+
for (var i = 0; i < literals_length; i++) {
227+
dst[dpos++] = src[anchor+i]
228+
}
229+
230+
// encode offset
231+
dst[dpos++] = offset
232+
dst[dpos++] = (offset >> 8)
233+
234+
// encode match length
235+
if (match_length >= mlMask) {
236+
match_length -= mlMask
237+
while (match_length >= 255) {
238+
match_length -= 255
239+
dst[dpos++] = 255
240+
}
241+
242+
dst[dpos++] = match_length
243+
}
244+
245+
anchor = pos
246+
}
247+
}
248+
249+
// cannot compress input
250+
if (anchor == 0) return 0
251+
252+
// Write last literals
253+
// encode literals length
254+
literals_length = src.length - anchor
255+
if (literals_length >= runMask) {
256+
// add match length to the token
257+
dst[dpos++] = (runMask << mlBits)
258+
for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
259+
dst[dpos++] = 255
260+
}
261+
dst[dpos++] = ln
262+
} else {
263+
// add match length to the token
264+
dst[dpos++] = (literals_length << mlBits)
265+
}
266+
267+
// write literals
268+
pos = anchor
269+
while (pos < src.length) {
270+
dst[dpos++] = src[pos++]
271+
}
272+
273+
return dpos
274+
}
275+
276+
exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea
277+
278+
exports.compressPackage = function(data, verify) {
279+
if (verify) {
280+
var temp = new Uint8Array(exports.CHUNK_SIZE);
281+
}
282+
// compress the data in chunks
283+
assert(data instanceof ArrayBuffer);
284+
data = new Uint8Array(data);
285+
console.log('compressing package of size ' + data.length);
286+
var compressedChunks = [];
287+
var successes = [];
288+
var offset = 0;
289+
var total = 0;
290+
while (offset < data.length) {
291+
var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
292+
//console.log('compress a chunk ' + [offset, total, data.length]);
293+
offset += exports.CHUNK_SIZE;
294+
var bound = exports.compressBound(chunk.length);
295+
var compressed = new Uint8Array(bound);
296+
var compressedSize = exports.compress(chunk, compressed);
297+
if (compressedSize > 0) {
298+
assert(compressedSize <= bound);
299+
compressed = compressed.subarray(0, compressedSize);
300+
compressedChunks.push(compressed);
301+
total += compressedSize;
302+
successes.push(1);
303+
if (verify) {
304+
var back = exports.uncompress(compressed, temp);
305+
assert(back === chunk.length, [back, chunk.length]);
306+
for (var i = 0; i < chunk.length; i++) {
307+
assert(chunk[i] === temp[i]);
308+
}
309+
}
310+
} else {
311+
assert(compressedSize === 0);
312+
// failure to compress :(
313+
compressedChunks.push(chunk);
314+
total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
315+
successes.push(0);
316+
}
317+
}
318+
data = null; // XXX null out pack['data'] too?
319+
var compressedData = {
320+
'data': new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
321+
'cachedOffset': total,
322+
'cachedIndexes': [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
323+
'cachedChunks': [null, null],
324+
'offsets': [], // chunk# => start in compressed data
325+
'sizes': [],
326+
'successes': successes, // 1 if chunk is compressed
327+
};
328+
offset = 0;
329+
for (var i = 0; i < compressedChunks.length; i++) {
330+
compressedData['data'].set(compressedChunks[i], offset);
331+
compressedData['offsets'][i] = offset;
332+
compressedData['sizes'][i] = compressedChunks[i].length
333+
offset += compressedChunks[i].length;
334+
}
335+
console.log('compressed package into ' + [compressedData['data'].length]);
336+
assert(offset === total);
337+
return compressedData;
338+
};
339+
340+
assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller
341+
342+
return exports;
343+
344+
})();
345+

0 commit comments

Comments
 (0)