Skip to content

Commit 7510f52

Browse files
committed
Use more recent version of lz4js
The published version of lz4js is outdated, this code also includes a fix from Benzinga/lz4js#13
1 parent cb5abc9 commit 7510f52

File tree

1 file changed

+110
-103
lines changed

1 file changed

+110
-103
lines changed

packages/hub/src/vendor/lz4js/index.ts

Lines changed: 110 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ import * as util from "./util.js";
1616

1717
// Compression format parameters/constants.
1818
const minMatch = 4;
19-
const minLength = 13;
20-
const searchLimit = 5;
19+
const matchSearchLimit = 12;
20+
const minTrailingLitterals = 5;
2121
const skipTrigger = 6;
2222
const hashSize = 1 << 16;
2323

@@ -47,7 +47,7 @@ const bsUncompressed = 0x80000000;
4747
const bsDefault = 7;
4848
const bsShift = 4;
4949
const bsMask = 7;
50-
const bsMap = {
50+
const bsMap: Record<number, number> = {
5151
4: 0x10000,
5252
5: 0x40000,
5353
6: 0x100000,
@@ -73,14 +73,14 @@ function makeHashTable() {
7373
}
7474

7575
// Clear hashtable.
76-
function clearHashTable(table: typeof hashTable) {
76+
function clearHashTable(table: Uint32Array | number[]) {
7777
for (let i = 0; i < hashSize; i++) {
7878
table[i] = 0;
7979
}
8080
}
8181

8282
// Makes a byte buffer. On older browsers, may return a plain array.
83-
export function makeBuffer(size: number): Uint8Array {
83+
function makeBuffer(size: number) {
8484
return new Uint8Array(size);
8585
}
8686

@@ -92,12 +92,12 @@ function sliceArray(array: Uint8Array, start: number, end: number) {
9292
// --
9393

9494
// Calculates an upper bound for lz4 compression.
95-
export function compressBound(n: number): number {
95+
export function compressBound(n: number) {
9696
return (n + n / 255 + 16) | 0;
9797
}
9898

9999
// Calculates an upper bound for lz4 decompression, by reading the data.
100-
export function decompressBound(src: Uint8Array): number {
100+
export function decompressBound(src: Uint8Array) {
101101
let sIndex = 0;
102102

103103
// Read magic number
@@ -122,11 +122,11 @@ export function decompressBound(src: Uint8Array): number {
122122
// Read block size
123123
const bsIdx = (src[sIndex++] >> bsShift) & bsMask;
124124

125-
if (!(bsIdx in bsMap)) {
125+
if (bsMap[bsIdx] === undefined) {
126126
throw new Error("invalid block size " + bsIdx);
127127
}
128128

129-
const maxBlockSize = bsMap[bsIdx as keyof typeof bsMap];
129+
const maxBlockSize = bsMap[bsIdx];
130130

131131
// Get content size
132132
if (useContentSize) {
@@ -145,7 +145,7 @@ export function decompressBound(src: Uint8Array): number {
145145
if (blockSize & bsUncompressed) {
146146
blockSize &= ~bsUncompressed;
147147
maxSize += blockSize;
148-
} else {
148+
} else if (blockSize > 0) {
149149
maxSize += maxBlockSize;
150150
}
151151

@@ -162,17 +162,12 @@ export function decompressBound(src: Uint8Array): number {
162162
}
163163

164164
// Decompresses a block of Lz4.
165-
export function decompressBlock(
166-
src: Uint8Array,
167-
dst: Uint8Array,
168-
sIndex: number,
169-
sLength: number,
170-
dIndex: number
171-
): number {
172-
let mLength: number, mOffset: number, n: number, i: number;
165+
export function decompressBlock(src: Uint8Array, dst: Uint8Array, sIndex: number, sLength: number, dIndex: number) {
166+
let mLength, mOffset, sEnd, n, i;
167+
const hasCopyWithin = dst.copyWithin !== undefined && dst.fill !== undefined;
173168

174169
// Setup initial state.
175-
const sEnd = sIndex + sLength;
170+
sEnd = sIndex + sLength;
176171

177172
// Consume entire input block.
178173
while (sIndex < sEnd) {
@@ -219,9 +214,21 @@ export function decompressBlock(
219214

220215
mLength += minMatch;
221216

222-
// Copy match.
223-
for (i = dIndex - mOffset, n = i + mLength; i < n; ) {
224-
dst[dIndex++] = dst[i++] | 0;
217+
// Copy match
218+
// prefer to use typedarray.copyWithin for larger matches
219+
// NOTE: copyWithin doesn't work as required by LZ4 for overlapping sequences
220+
// e.g. mOffset=1, mLength=30 (repeach char 30 times)
221+
// we special case the repeat char w/ array.fill
222+
if (hasCopyWithin && mOffset === 1) {
223+
dst.fill(dst[dIndex - 1] | 0, dIndex, dIndex + mLength);
224+
dIndex += mLength;
225+
} else if (hasCopyWithin && mOffset > mLength && mLength > 31) {
226+
dst.copyWithin(dIndex, dIndex - mOffset, dIndex - mOffset + mLength);
227+
dIndex += mLength;
228+
} else {
229+
for (i = dIndex - mOffset, n = i + mLength; i < n; ) {
230+
dst[dIndex++] = dst[i++] | 0;
231+
}
225232
}
226233
}
227234

@@ -235,92 +242,90 @@ export function compressBlock(
235242
sIndex: number,
236243
sLength: number,
237244
hashTable: Uint32Array | number[]
238-
): number {
239-
let mIndex: number, mAnchor: number, mLength: number, mOffset: number, mStep: number;
240-
let literalCount: number, dIndex: number, n: number;
245+
) {
246+
let mIndex, mAnchor, mLength, mOffset, mStep;
247+
let literalCount, dIndex, sEnd, n;
241248

242249
// Setup initial state.
243250
dIndex = 0;
244-
const sEnd = sLength + sIndex;
251+
sEnd = sLength + sIndex;
245252
mAnchor = sIndex;
246253

247-
// Process only if block is large enough.
248-
if (sLength >= minLength) {
249-
let searchMatchCount = (1 << skipTrigger) + 3;
254+
let searchMatchCount = (1 << skipTrigger) + 3;
250255

251-
// Consume until last n literals (Lz4 spec limitation.)
252-
while (sIndex + minMatch < sEnd - searchLimit) {
253-
const seq = util.readU32(src, sIndex);
254-
let hash = util.hashU32(seq) >>> 0;
256+
// Search for matches with a limit of matchSearchLimit bytes
257+
// before the end of block (Lz4 spec limitation.)
258+
while (sIndex <= sEnd - matchSearchLimit) {
259+
const seq = util.readU32(src, sIndex);
260+
let hash = util.hashU32(seq) >>> 0;
255261

256-
// Crush hash to 16 bits.
257-
hash = (((hash >> 16) ^ hash) >>> 0) & 0xffff;
262+
// Crush hash to 16 bits.
263+
hash = (((hash >> 16) ^ hash) >>> 0) & 0xffff;
258264

259-
// Look for a match in the hashtable. NOTE: remove one; see below.
260-
mIndex = hashTable[hash] - 1;
265+
// Look for a match in the hashtable. NOTE: remove one; see below.
266+
mIndex = hashTable[hash] - 1;
261267

262-
// Put pos in hash table. NOTE: add one so that zero = invalid.
263-
hashTable[hash] = sIndex + 1;
268+
// Put pos in hash table. NOTE: add one so that zero = invalid.
269+
hashTable[hash] = sIndex + 1;
264270

265-
// Determine if there is a match (within range.)
266-
if (mIndex < 0 || (sIndex - mIndex) >>> 16 > 0 || util.readU32(src, mIndex) !== seq) {
267-
mStep = searchMatchCount++ >> skipTrigger;
268-
sIndex += mStep;
269-
continue;
270-
}
271+
// Determine if there is a match (within range.)
272+
if (mIndex < 0 || (sIndex - mIndex) >>> 16 > 0 || util.readU32(src, mIndex) !== seq) {
273+
mStep = searchMatchCount++ >> skipTrigger;
274+
sIndex += mStep;
275+
continue;
276+
}
271277

272-
searchMatchCount = (1 << skipTrigger) + 3;
278+
searchMatchCount = (1 << skipTrigger) + 3;
273279

274-
// Calculate literal count and offset.
275-
literalCount = sIndex - mAnchor;
276-
mOffset = sIndex - mIndex;
280+
// Calculate literal count and offset.
281+
literalCount = sIndex - mAnchor;
282+
mOffset = sIndex - mIndex;
277283

278-
// We've already matched one word, so get that out of the way.
279-
sIndex += minMatch;
280-
mIndex += minMatch;
284+
// We've already matched one word, so get that out of the way.
285+
sIndex += minMatch;
286+
mIndex += minMatch;
281287

282-
// Determine match length.
283-
// N.B.: mLength does not include minMatch, Lz4 adds it back
284-
// in decoding.
285-
mLength = sIndex;
286-
while (sIndex < sEnd - searchLimit && src[sIndex] === src[mIndex]) {
287-
sIndex++;
288-
mIndex++;
289-
}
290-
mLength = sIndex - mLength;
291-
292-
// Write token + literal count.
293-
const token = mLength < mlMask ? mLength : mlMask;
294-
if (literalCount >= runMask) {
295-
dst[dIndex++] = (runMask << mlBits) + token;
296-
for (n = literalCount - runMask; n >= 0xff; n -= 0xff) {
297-
dst[dIndex++] = 0xff;
298-
}
299-
dst[dIndex++] = n;
300-
} else {
301-
dst[dIndex++] = (literalCount << mlBits) + token;
288+
// Determine match length.
289+
// N.B.: mLength does not include minMatch, Lz4 adds it back
290+
// in decoding.
291+
mLength = sIndex;
292+
while (sIndex < sEnd - minTrailingLitterals && src[sIndex] === src[mIndex]) {
293+
sIndex++;
294+
mIndex++;
295+
}
296+
mLength = sIndex - mLength;
297+
298+
// Write token + literal count.
299+
const token = mLength < mlMask ? mLength : mlMask;
300+
if (literalCount >= runMask) {
301+
dst[dIndex++] = (runMask << mlBits) + token;
302+
for (n = literalCount - runMask; n >= 0xff; n -= 0xff) {
303+
dst[dIndex++] = 0xff;
302304
}
305+
dst[dIndex++] = n;
306+
} else {
307+
dst[dIndex++] = (literalCount << mlBits) + token;
308+
}
303309

304-
// Write literals.
305-
for (let i = 0; i < literalCount; i++) {
306-
dst[dIndex++] = src[mAnchor + i];
307-
}
310+
// Write literals.
311+
for (let i = 0; i < literalCount; i++) {
312+
dst[dIndex++] = src[mAnchor + i];
313+
}
308314

309-
// Write offset.
310-
dst[dIndex++] = mOffset;
311-
dst[dIndex++] = mOffset >> 8;
315+
// Write offset.
316+
dst[dIndex++] = mOffset;
317+
dst[dIndex++] = mOffset >> 8;
312318

313-
// Write match length.
314-
if (mLength >= mlMask) {
315-
for (n = mLength - mlMask; n >= 0xff; n -= 0xff) {
316-
dst[dIndex++] = 0xff;
317-
}
318-
dst[dIndex++] = n;
319+
// Write match length.
320+
if (mLength >= mlMask) {
321+
for (n = mLength - mlMask; n >= 0xff; n -= 0xff) {
322+
dst[dIndex++] = 0xff;
319323
}
320-
321-
// Move the anchor.
322-
mAnchor = sIndex;
324+
dst[dIndex++] = n;
323325
}
326+
327+
// Move the anchor.
328+
mAnchor = sIndex;
324329
}
325330

326331
// Nothing was encoded.
@@ -351,7 +356,8 @@ export function compressBlock(
351356
}
352357

353358
// Decompresses a frame of Lz4 data.
354-
export function decompressFrame(src: Uint8Array, dst: Uint8Array): number {
359+
export function decompressFrame(src: Uint8Array, dst: Uint8Array) {
360+
let useBlockSum, useContentSum, useContentSize, descriptor;
355361
let sIndex = 0;
356362
let dIndex = 0;
357363

@@ -363,22 +369,22 @@ export function decompressFrame(src: Uint8Array, dst: Uint8Array): number {
363369
sIndex += 4;
364370

365371
// Read descriptor
366-
const descriptor = src[sIndex++];
372+
descriptor = src[sIndex++];
367373

368374
// Check version
369375
if ((descriptor & fdVersionMask) !== fdVersion) {
370376
throw new Error("incompatible descriptor version");
371377
}
372378

373379
// Read flags
374-
const useBlockSum = (descriptor & fdBlockChksum) !== 0;
375-
const useContentSum = (descriptor & fdContentChksum) !== 0;
376-
const useContentSize = (descriptor & fdContentSize) !== 0;
380+
useBlockSum = (descriptor & fdBlockChksum) !== 0;
381+
useContentSum = (descriptor & fdContentChksum) !== 0;
382+
useContentSize = (descriptor & fdContentSize) !== 0;
377383

378384
// Read block size
379385
const bsIdx = (src[sIndex++] >> bsShift) & bsMask;
380386

381-
if (!(bsIdx in bsMap)) {
387+
if (bsMap[bsIdx] === undefined) {
382388
throw new Error("invalid block size");
383389
}
384390

@@ -391,7 +397,9 @@ export function decompressFrame(src: Uint8Array, dst: Uint8Array): number {
391397

392398
// Read blocks.
393399
while (true) {
394-
let compSize = util.readU32(src, sIndex);
400+
var compSize;
401+
402+
compSize = util.readU32(src, sIndex);
395403
sIndex += 4;
396404

397405
if (compSize === 0) {
@@ -428,7 +436,7 @@ export function decompressFrame(src: Uint8Array, dst: Uint8Array): number {
428436
}
429437

430438
// Compresses data to an Lz4 frame.
431-
export function compressFrame(src: Uint8Array, dst: Uint8Array): number {
439+
export function compressFrame(src: Uint8Array, dst: Uint8Array) {
432440
let dIndex = 0;
433441

434442
// Write magic number.
@@ -492,15 +500,14 @@ export function compressFrame(src: Uint8Array, dst: Uint8Array): number {
492500
// Decompresses a buffer containing an Lz4 frame. maxSize is optional; if not
493501
// provided, a maximum size will be determined by examining the data. The
494502
// buffer returned will always be perfectly-sized.
495-
export function decompress(src: Uint8Array, maxSize: number): Uint8Array {
496-
let dst;
503+
export function decompress(src: Uint8Array, maxSize: number) {
504+
let dst, size;
497505

498506
if (maxSize === undefined) {
499507
maxSize = decompressBound(src);
500508
}
501-
502509
dst = makeBuffer(maxSize);
503-
const size = decompressFrame(src, dst);
510+
size = decompressFrame(src, dst);
504511

505512
if (size !== maxSize) {
506513
dst = sliceArray(dst, 0, size);
@@ -512,15 +519,15 @@ export function decompress(src: Uint8Array, maxSize: number): Uint8Array {
512519
// Compresses a buffer to an Lz4 frame. maxSize is optional; if not provided,
513520
// a buffer will be created based on the theoretical worst output size for a
514521
// given input size. The buffer returned will always be perfectly-sized.
515-
export function compress(src: Uint8Array, maxSize: number): Uint8Array {
516-
let dst;
522+
export function compress(src: Uint8Array, maxSize: number) {
523+
let dst, size;
517524

518525
if (maxSize === undefined) {
519526
maxSize = compressBound(src.length);
520527
}
521528

522529
dst = makeBuffer(maxSize);
523-
const size = compressFrame(src, dst);
530+
size = compressFrame(src, dst);
524531

525532
if (size !== maxSize) {
526533
dst = sliceArray(dst, 0, size);

0 commit comments

Comments
 (0)