1
+ function uncompress ( source , uncompressedSize ) {
1
2
/*
3
+ based off https://github.com/emscripten-core/emscripten/blob/main/third_party/mini-lz4.js
4
+ ====
2
5
MiniLZ4: Minimal LZ4 block decoding and encoding.
3
6
4
7
based off of node-lz4, https://github.com/pierrec/node-lz4
@@ -27,16 +30,11 @@ THE SOFTWARE.
27
30
28
31
changes have the same license
29
32
*/
30
-
31
- var MiniLZ4 = ( function ( ) {
32
-
33
- var exports = { } ;
34
-
35
33
/**
36
- * Decode a block. Assumptions: input contains all sequences of a
34
+ * Decode a block. Assumptions: input contains all sequences of a
37
35
* chunk, output is large enough to receive the decoded data.
38
36
* If the output buffer is too small, an error will be thrown.
39
- * If the returned value is negative, an error occured at the returned offset.
37
+ * If the returned value is negative, an error occurred at the returned offset.
40
38
*
41
39
* @param {ArrayBufferView } input input data
42
40
* @param {ArrayBufferView } output output data
@@ -45,7 +43,7 @@ var exports = {};
45
43
* @return {number } number of decoded bytes
46
44
* @private
47
45
*/
48
- exports . uncompress = function ( input , output , sIdx , eIdx ) {
46
+ function uncompressBlock ( input , output , sIdx , eIdx ) {
49
47
sIdx = sIdx || 0
50
48
eIdx = eIdx || ( input . length - sIdx )
51
49
// Process each sequence in the incoming data
@@ -85,7 +83,6 @@ exports.uncompress = function (input, output, sIdx, eIdx) {
85
83
l = input [ i ++ ]
86
84
match_length += l
87
85
}
88
-
89
86
// Copy the match
90
87
var pos = j - offset // position of the match copy in the current output
91
88
var end = j + match_length + 4 // minmatch = 4
@@ -94,252 +91,25 @@ exports.uncompress = function (input, output, sIdx, eIdx) {
94
91
95
92
return j
96
93
}
97
-
98
- var
99
- maxInputSize = 0x7E000000
100
- , minMatch = 4
101
- // uint32() optimization
102
- , hashLog = 16
103
- , hashShift = ( minMatch * 8 ) - hashLog
104
- , hashSize = 1 << hashLog
105
-
106
- , copyLength = 8
107
- , lastLiterals = 5
108
- , mfLimit = copyLength + minMatch
109
- , skipStrength = 6
110
-
111
- , mlBits = 4
112
- , mlMask = ( 1 << mlBits ) - 1
113
- , runBits = 8 - mlBits
114
- , runMask = ( 1 << runBits ) - 1
115
-
116
- , hasher = /* XXX uint32( */ 2654435761 /* ) */
117
-
118
- assert ( hashShift === 16 ) ;
119
- var hashTable = new Int16Array ( 1 << 16 ) ;
120
- var empty = new Int16Array ( hashTable . length ) ;
121
-
122
- // CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
123
- exports . compressBound = function ( isize ) {
124
- return isize > maxInputSize
125
- ? 0
126
- : ( isize + ( isize / 255 ) + 16 ) | 0
127
- }
128
-
129
- /** @param {number= } sIdx
130
- @param {number= } eIdx */
131
- exports . compress = function ( src , dst , sIdx , eIdx ) {
132
- hashTable . set ( empty ) ;
133
- return compressBlock ( src , dst , 0 , sIdx || 0 , eIdx || dst . length )
134
- }
135
-
136
- function compressBlock ( src , dst , pos , sIdx , eIdx ) {
137
- // XXX var Hash = uint32() // Reusable unsigned 32 bits integer
138
- var dpos = sIdx
139
- var dlen = eIdx - sIdx
140
- var anchor = 0
141
-
142
- if ( src . length >= maxInputSize ) throw new Error ( "input too large" )
143
-
144
- // Minimum of input bytes for compression (LZ4 specs)
145
- if ( src . length > mfLimit ) {
146
- var n = exports . compressBound ( src . length )
147
- if ( dlen < n ) throw Error ( "output too small: " + dlen + " < " + n )
148
-
149
- var
150
- step = 1
151
- , findMatchAttempts = ( 1 << skipStrength ) + 3
152
- // Keep last few bytes incompressible (LZ4 specs):
153
- // last 5 bytes must be literals
154
- , srcLength = src . length - mfLimit
155
-
156
- while ( pos + minMatch < srcLength ) {
157
- // Find a match
158
- // min match of 4 bytes aka sequence
159
- var sequenceLowBits = src [ pos + 1 ] << 8 | src [ pos ]
160
- var sequenceHighBits = src [ pos + 3 ] << 8 | src [ pos + 2 ]
161
- // compute hash for the current sequence
162
- var hash = Math . imul ( sequenceLowBits | ( sequenceHighBits << 16 ) , hasher ) >>> hashShift ;
163
- /* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
164
- .multiply(hasher)
165
- .shiftr(hashShift)
166
- .toNumber() */
167
- // get the position of the sequence matching the hash
168
- // NB. since 2 different sequences may have the same hash
169
- // it is double-checked below
170
- // do -1 to distinguish between initialized and uninitialized values
171
- var ref = hashTable [ hash ] - 1
172
- // save position of current sequence in hash table
173
- hashTable [ hash ] = pos + 1
174
-
175
- // first reference or within 64k limit or current sequence !== hashed one: no match
176
- if ( ref < 0 ||
177
- ( ( pos - ref ) >>> 16 ) > 0 ||
178
- (
179
- ( ( src [ ref + 3 ] << 8 | src [ ref + 2 ] ) != sequenceHighBits ) ||
180
- ( ( src [ ref + 1 ] << 8 | src [ ref ] ) != sequenceLowBits )
181
- )
182
- ) {
183
- // increase step if nothing found within limit
184
- step = findMatchAttempts ++ >> skipStrength
185
- pos += step
186
- continue
187
- }
188
-
189
- findMatchAttempts = ( 1 << skipStrength ) + 3
190
-
191
- // got a match
192
- var literals_length = pos - anchor
193
- var offset = pos - ref
194
-
195
- // minMatch already verified
196
- pos += minMatch
197
- ref += minMatch
198
-
199
- // move to the end of the match (>=minMatch)
200
- var match_length = pos
201
- while ( pos < srcLength && src [ pos ] == src [ ref ] ) {
202
- pos ++
203
- ref ++
204
- }
205
-
206
- // match length
207
- match_length = pos - match_length
208
-
209
- // token
210
- var token = match_length < mlMask ? match_length : mlMask
211
-
212
- // encode literals length
213
- if ( literals_length >= runMask ) {
214
- // add match length to the token
215
- dst [ dpos ++ ] = ( runMask << mlBits ) + token
216
- for ( var len = literals_length - runMask ; len > 254 ; len -= 255 ) {
217
- dst [ dpos ++ ] = 255
218
- }
219
- dst [ dpos ++ ] = len
220
- } else {
221
- // add match length to the token
222
- dst [ dpos ++ ] = ( literals_length << mlBits ) + token
223
- }
224
-
225
- // write literals
226
- for ( var i = 0 ; i < literals_length ; i ++ ) {
227
- dst [ dpos ++ ] = src [ anchor + i ]
228
- }
229
-
230
- // encode offset
231
- dst [ dpos ++ ] = offset
232
- dst [ dpos ++ ] = ( offset >> 8 )
233
-
234
- // encode match length
235
- if ( match_length >= mlMask ) {
236
- match_length -= mlMask
237
- while ( match_length >= 255 ) {
238
- match_length -= 255
239
- dst [ dpos ++ ] = 255
240
- }
241
-
242
- dst [ dpos ++ ] = match_length
243
- }
244
-
245
- anchor = pos
94
+ var result = new ArrayBuffer ( uncompressedSize ) ;
95
+ var sourceIndex = 0 ;
96
+ var destIndex = 0 ;
97
+ var blockSize ;
98
+ while ( ( blockSize = ( source [ sourceIndex ] | ( source [ sourceIndex + 1 ] << 8 ) | ( source [ sourceIndex + 2 ] << 16 ) | ( source [ sourceIndex + 3 ] << 24 ) ) ) > 0 )
99
+ {
100
+ sourceIndex += 4 ;
101
+ if ( blockSize & 0x80000000 )
102
+ {
103
+ blockSize &= 0x7FFFFFFFF ;
104
+ for ( var i = 0 ; i < blockSize ; i ++ ) {
105
+ result [ destIndex ++ ] = source [ sourceIndex ++ ] ;
246
106
}
247
107
}
248
-
249
- // cannot compress input
250
- if ( anchor == 0 ) return 0
251
-
252
- // Write last literals
253
- // encode literals length
254
- literals_length = src . length - anchor
255
- if ( literals_length >= runMask ) {
256
- // add match length to the token
257
- dst [ dpos ++ ] = ( runMask << mlBits )
258
- for ( var ln = literals_length - runMask ; ln > 254 ; ln -= 255 ) {
259
- dst [ dpos ++ ] = 255
260
- }
261
- dst [ dpos ++ ] = ln
262
- } else {
263
- // add match length to the token
264
- dst [ dpos ++ ] = ( literals_length << mlBits )
265
- }
266
-
267
- // write literals
268
- pos = anchor
269
- while ( pos < src . length ) {
270
- dst [ dpos ++ ] = src [ pos ++ ]
108
+ else
109
+ {
110
+ destIndex += uncompressBlock ( source , new Uint8Array ( result , destIndex , uncompressedSize - destIndex ) , sourceIndex , sourceIndex + blockSize ) ;
111
+ sourceIndex += blockSize ;
271
112
}
272
-
273
- return dpos
274
113
}
275
-
276
- exports . CHUNK_SIZE = 2048 ; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea
277
-
278
- exports . compressPackage = function ( data , verify ) {
279
- if ( verify ) {
280
- var temp = new Uint8Array ( exports . CHUNK_SIZE ) ;
281
- }
282
- // compress the data in chunks
283
- assert ( data instanceof ArrayBuffer ) ;
284
- data = new Uint8Array ( data ) ;
285
- console . log ( 'compressing package of size ' + data . length ) ;
286
- var compressedChunks = [ ] ;
287
- var successes = [ ] ;
288
- var offset = 0 ;
289
- var total = 0 ;
290
- while ( offset < data . length ) {
291
- var chunk = data . subarray ( offset , offset + exports . CHUNK_SIZE ) ;
292
- //console.log('compress a chunk ' + [offset, total, data.length]);
293
- offset += exports . CHUNK_SIZE ;
294
- var bound = exports . compressBound ( chunk . length ) ;
295
- var compressed = new Uint8Array ( bound ) ;
296
- var compressedSize = exports . compress ( chunk , compressed ) ;
297
- if ( compressedSize > 0 ) {
298
- assert ( compressedSize <= bound ) ;
299
- compressed = compressed . subarray ( 0 , compressedSize ) ;
300
- compressedChunks . push ( compressed ) ;
301
- total += compressedSize ;
302
- successes . push ( 1 ) ;
303
- if ( verify ) {
304
- var back = exports . uncompress ( compressed , temp ) ;
305
- assert ( back === chunk . length , [ back , chunk . length ] ) ;
306
- for ( var i = 0 ; i < chunk . length ; i ++ ) {
307
- assert ( chunk [ i ] === temp [ i ] ) ;
308
- }
309
- }
310
- } else {
311
- assert ( compressedSize === 0 ) ;
312
- // failure to compress :(
313
- compressedChunks . push ( chunk ) ;
314
- total += chunk . length ; // last chunk may not be the full exports.CHUNK_SIZE size
315
- successes . push ( 0 ) ;
316
- }
317
- }
318
- data = null ; // XXX null out pack['data'] too?
319
- var compressedData = {
320
- 'data' : new Uint8Array ( total + exports . CHUNK_SIZE * 2 ) , // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
321
- 'cachedOffset' : total ,
322
- 'cachedIndexes' : [ - 1 , - 1 ] , // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
323
- 'cachedChunks' : [ null , null ] ,
324
- 'offsets' : [ ] , // chunk# => start in compressed data
325
- 'sizes' : [ ] ,
326
- 'successes' : successes , // 1 if chunk is compressed
327
- } ;
328
- offset = 0 ;
329
- for ( var i = 0 ; i < compressedChunks . length ; i ++ ) {
330
- compressedData [ 'data' ] . set ( compressedChunks [ i ] , offset ) ;
331
- compressedData [ 'offsets' ] [ i ] = offset ;
332
- compressedData [ 'sizes' ] [ i ] = compressedChunks [ i ] . length
333
- offset += compressedChunks [ i ] . length ;
334
- }
335
- console . log ( 'compressed package into ' + [ compressedData [ 'data' ] . length ] ) ;
336
- assert ( offset === total ) ;
337
- return compressedData ;
338
- } ;
339
-
340
- assert ( exports . CHUNK_SIZE < ( 1 << 15 ) ) ; // we use 16-bit ints as the type of the hash table, chunk size must be smaller
341
-
342
- return exports ;
343
-
344
- } ) ( ) ;
345
-
114
+ return new Uint8Array ( result , 0 , uncompressedSize ) ;
115
+ }
0 commit comments