Skip to content

Commit 004119d

Browse files
authored
Merge pull request #67 from SpringMT/feature/update-zstd-1-5-6
feat: update Zstd 1.5.6
2 parents 1f9f931 + 677a499 commit 004119d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+4618
-2305
lines changed

ext/zstdruby/libzstd/common/allocations.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#define ZSTD_DEPS_NEED_MALLOC
1515
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
1616

17-
#include "mem.h" /* MEM_STATIC */
17+
#include "compiler.h" /* MEM_STATIC */
1818
#define ZSTD_STATIC_LINKING_ONLY
1919
#include "../zstd.h" /* ZSTD_customMem */
2020

ext/zstdruby/libzstd/common/bitstream.h

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
9090
/*-********************************************
9191
* bitStream decoding API (read backward)
9292
**********************************************/
93+
typedef size_t BitContainerType;
9394
typedef struct {
94-
size_t bitContainer;
95+
BitContainerType bitContainer;
9596
unsigned bitsConsumed;
9697
const char* ptr;
9798
const char* start;
9899
const char* limitPtr;
99100
} BIT_DStream_t;
100101

101-
typedef enum { BIT_DStream_unfinished = 0,
102-
BIT_DStream_endOfBuffer = 1,
103-
BIT_DStream_completed = 2,
104-
BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
105-
/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
102+
typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
103+
BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
104+
BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
105+
BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
106+
} BIT_DStream_status; /* result of BIT_reloadDStream() */
106107

107108
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
108109
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
@@ -112,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
112113

113114
/* Start by invoking BIT_initDStream().
114115
* A chunk of the bitStream is then stored into a local register.
115-
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
116+
* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
116117
* You can then retrieve bitFields stored into the local register, **in reverse order**.
117118
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
118119
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@@ -162,7 +163,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
162163
return 0;
163164
}
164165

165-
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
166+
FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
166167
{
167168
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
168169
return _bzhi_u64(bitContainer, nbBits);
@@ -267,22 +268,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
267268
bitD->bitContainer = *(const BYTE*)(bitD->start);
268269
switch(srcSize)
269270
{
270-
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
271+
case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
271272
ZSTD_FALLTHROUGH;
272273

273-
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
274+
case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
274275
ZSTD_FALLTHROUGH;
275276

276-
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
277+
case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
277278
ZSTD_FALLTHROUGH;
278279

279-
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
280+
case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
280281
ZSTD_FALLTHROUGH;
281282

282-
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
283+
case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
283284
ZSTD_FALLTHROUGH;
284285

285-
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
286+
case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
286287
ZSTD_FALLTHROUGH;
287288

288289
default: break;
@@ -297,12 +298,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
297298
return srcSize;
298299
}
299300

300-
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
301+
FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
301302
{
302303
return bitContainer >> start;
303304
}
304305

305-
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
306+
FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
306307
{
307308
U32 const regMask = sizeof(bitContainer)*8 - 1;
308309
/* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -325,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
325326
* On 32-bits, maxNbBits==24.
326327
* On 64-bits, maxNbBits==56.
327328
* @return : value extracted */
328-
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
329+
FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
329330
{
330331
/* arbitrate between double-shift and shift+mask */
331332
#if 1
@@ -348,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
348349
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
349350
}
350351

351-
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
352+
FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
352353
{
353354
bitD->bitsConsumed += nbBits;
354355
}
@@ -357,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
357358
* Read (consume) next n bits from local register and update.
358359
* Pay attention to not read more than nbBits contained into local register.
359360
* @return : extracted value. */
360-
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
361+
FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
361362
{
362363
size_t const value = BIT_lookBits(bitD, nbBits);
363364
BIT_skipBits(bitD, nbBits);
@@ -374,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
374375
return value;
375376
}
376377

378+
/*! BIT_reloadDStream_internal() :
379+
* Simple variant of BIT_reloadDStream(), with two conditions:
380+
* 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
381+
* 2. look window is valid after shifted down : bitD->ptr >= bitD->start
382+
*/
383+
MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
384+
{
385+
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
386+
bitD->ptr -= bitD->bitsConsumed >> 3;
387+
assert(bitD->ptr >= bitD->start);
388+
bitD->bitsConsumed &= 7;
389+
bitD->bitContainer = MEM_readLEST(bitD->ptr);
390+
return BIT_DStream_unfinished;
391+
}
392+
377393
/*! BIT_reloadDStreamFast() :
378394
* Similar to BIT_reloadDStream(), but with two differences:
379395
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@@ -384,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
384400
{
385401
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
386402
return BIT_DStream_overflow;
387-
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
388-
bitD->ptr -= bitD->bitsConsumed >> 3;
389-
bitD->bitsConsumed &= 7;
390-
bitD->bitContainer = MEM_readLEST(bitD->ptr);
391-
return BIT_DStream_unfinished;
403+
return BIT_reloadDStream_internal(bitD);
392404
}
393405

394406
/*! BIT_reloadDStream() :
395407
* Refill `bitD` from buffer previously set in BIT_initDStream() .
396-
* This function is safe, it guarantees it will not read beyond src buffer.
408+
* This function is safe, it guarantees it will not never beyond src buffer.
397409
* @return : status of `BIT_DStream_t` internal register.
398410
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
399-
MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
411+
FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
400412
{
401-
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
413+
/* note : once in overflow mode, a bitstream remains in this mode until it's reset */
414+
if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
415+
static const BitContainerType zeroFilled = 0;
416+
bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
417+
/* overflow detected, erroneous scenario or end of stream: no update */
402418
return BIT_DStream_overflow;
419+
}
420+
421+
assert(bitD->ptr >= bitD->start);
403422

404423
if (bitD->ptr >= bitD->limitPtr) {
405-
return BIT_reloadDStreamFast(bitD);
424+
return BIT_reloadDStream_internal(bitD);
406425
}
407426
if (bitD->ptr == bitD->start) {
427+
/* reached end of bitStream => no update */
408428
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
409429
return BIT_DStream_completed;
410430
}
411-
/* start < ptr < limitPtr */
431+
/* start < ptr < limitPtr => cautious update */
412432
{ U32 nbBytes = bitD->bitsConsumed >> 3;
413433
BIT_DStream_status result = BIT_DStream_unfinished;
414434
if (bitD->ptr - nbBytes < bitD->start) {

ext/zstdruby/libzstd/common/compiler.h

Lines changed: 114 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#ifndef ZSTD_COMPILER_H
1212
#define ZSTD_COMPILER_H
1313

14+
#include <stddef.h>
15+
1416
#include "portability_macros.h"
1517

1618
/*-*******************************************************
@@ -51,12 +53,19 @@
5153
# define WIN_CDECL
5254
#endif
5355

56+
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
57+
#if defined(__GNUC__)
58+
# define UNUSED_ATTR __attribute__((unused))
59+
#else
60+
# define UNUSED_ATTR
61+
#endif
62+
5463
/**
5564
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
5665
* parameters. They must be inlined for the compiler to eliminate the constant
5766
* branches.
5867
*/
59-
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
68+
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
6069
/**
6170
* HINT_INLINE is used to help the compiler generate better code. It is *not*
6271
* used for "templates", so it can be tweaked based on the compilers
@@ -71,14 +80,28 @@
7180
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
7281
# define HINT_INLINE static INLINE_KEYWORD
7382
#else
74-
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
83+
# define HINT_INLINE FORCE_INLINE_TEMPLATE
7584
#endif
7685

77-
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
86+
/* "soft" inline :
87+
* The compiler is free to select if it's a good idea to inline or not.
88+
* The main objective is to silence compiler warnings
89+
* when a defined function in included but not used.
90+
*
91+
* Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
92+
* Updating the prefix is probably preferable, but requires a fairly large codemod,
93+
* since this name is used everywhere.
94+
*/
95+
#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
7896
#if defined(__GNUC__)
79-
# define UNUSED_ATTR __attribute__((unused))
97+
# define MEM_STATIC static __inline UNUSED_ATTR
98+
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
99+
# define MEM_STATIC static inline
100+
#elif defined(_MSC_VER)
101+
# define MEM_STATIC static __inline
80102
#else
81-
# define UNUSED_ATTR
103+
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
104+
#endif
82105
#endif
83106

84107
/* force no inlining */
@@ -109,35 +132,36 @@
109132
/* prefetch
110133
* can be disabled, by declaring NO_PREFETCH build macro */
111134
#if defined(NO_PREFETCH)
112-
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
113-
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
135+
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
136+
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
114137
#else
115-
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
138+
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
116139
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
117140
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
118141
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
119142
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
120143
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
121144
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
122145
# elif defined(__aarch64__)
123-
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
124-
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
146+
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
147+
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
125148
# else
126-
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
127-
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
149+
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
150+
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
128151
# endif
129152
#endif /* NO_PREFETCH */
130153

131154
#define CACHELINE_SIZE 64
132155

133-
#define PREFETCH_AREA(p, s) { \
134-
const char* const _ptr = (const char*)(p); \
135-
size_t const _size = (size_t)(s); \
136-
size_t _pos; \
137-
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
138-
PREFETCH_L2(_ptr + _pos); \
139-
} \
140-
}
156+
#define PREFETCH_AREA(p, s) \
157+
do { \
158+
const char* const _ptr = (const char*)(p); \
159+
size_t const _size = (size_t)(s); \
160+
size_t _pos; \
161+
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
162+
PREFETCH_L2(_ptr + _pos); \
163+
} \
164+
} while (0)
141165

142166
/* vectorization
143167
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
@@ -166,9 +190,9 @@
166190
#endif
167191

168192
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
169-
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
193+
# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
170194
#else
171-
# define ZSTD_UNREACHABLE { assert(0); }
195+
# define ZSTD_UNREACHABLE do { assert(0); } while (0)
172196
#endif
173197

174198
/* disable warnings */
@@ -281,6 +305,74 @@
281305
* Sanitizer
282306
*****************************************************************/
283307

308+
/**
309+
* Zstd relies on pointer overflow in its decompressor.
310+
* We add this attribute to functions that rely on pointer overflow.
311+
*/
312+
#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
313+
# if __has_attribute(no_sanitize)
314+
# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
315+
/* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
316+
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
317+
# else
318+
/* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
319+
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
320+
# endif
321+
# else
322+
# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
323+
# endif
324+
#endif
325+
326+
/**
327+
* Helper function to perform a wrapped pointer difference without trigging
328+
* UBSAN.
329+
*
330+
* @returns lhs - rhs with wrapping
331+
*/
332+
MEM_STATIC
333+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
334+
ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
335+
{
336+
return lhs - rhs;
337+
}
338+
339+
/**
340+
* Helper function to perform a wrapped pointer add without triggering UBSAN.
341+
*
342+
* @return ptr + add with wrapping
343+
*/
344+
MEM_STATIC
345+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
346+
unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
347+
{
348+
return ptr + add;
349+
}
350+
351+
/**
352+
* Helper function to perform a wrapped pointer subtraction without triggering
353+
* UBSAN.
354+
*
355+
* @return ptr - sub with wrapping
356+
*/
357+
MEM_STATIC
358+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
359+
unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
360+
{
361+
return ptr - sub;
362+
}
363+
364+
/**
365+
* Helper function to add to a pointer that works around C's undefined behavior
366+
* of adding 0 to NULL.
367+
*
368+
* @returns `ptr + add` except it defines `NULL + 0 == NULL`.
369+
*/
370+
MEM_STATIC
371+
unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
372+
{
373+
return add > 0 ? ptr + add : ptr;
374+
}
375+
284376
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
285377
* abundance of caution, disable our custom poisoning on mingw. */
286378
#ifdef __MINGW32__

0 commit comments

Comments
 (0)