Skip to content

Commit a345f50

Browse files
author
haruyama-makoto
committed
Update zstd to v1.4.1
1 parent 2657582 commit a345f50

31 files changed

+1049
-597
lines changed

ext/zstdruby/libzstd/Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
1717
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
1818
LIBVER := $(shell echo $(LIBVER_SCRIPT))
1919
VERSION?= $(LIBVER)
20+
CCVER := $(shell $(CC) --version)
2021

2122
CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
2223
ifeq ($(OS),Windows_NT) # MinGW assumed
@@ -45,6 +46,10 @@ ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c))
4546
ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
4647
ZSTD_FILES := $(ZSTDCOMMON_FILES)
4748

49+
ifeq ($(findstring GCC,$(CCVER)),GCC)
50+
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
51+
endif
52+
4853
ZSTD_LEGACY_SUPPORT ?= 5
4954
ZSTD_LIB_COMPRESSION ?= 1
5055
ZSTD_LIB_DECOMPRESSION ?= 1

ext/zstdruby/libzstd/common/compiler.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@
127127
} \
128128
}
129129

130+
/* vectorization */
131+
#if !defined(__clang__) && defined(__GNUC__)
132+
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
133+
#else
134+
# define DONT_VECTORIZE
135+
#endif
136+
130137
/* disable warnings */
131138
#ifdef _MSC_VER /* Visual Studio */
132139
# include <intrin.h> /* For Visual 2005 */

ext/zstdruby/libzstd/common/zstd_internal.h

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#endif
3535
#include "xxhash.h" /* XXH_reset, update, digest */
3636

37-
3837
#if defined (__cplusplus)
3938
extern "C" {
4039
#endif
@@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
193192
* Shared functions to include for inlining
194193
*********************************************/
195194
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
195+
196196
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
197+
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198+
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199+
200+
#define WILDCOPY_OVERLENGTH 8
201+
#define VECLEN 16
202+
203+
typedef enum {
204+
ZSTD_no_overlap,
205+
ZSTD_overlap_src_before_dst,
206+
/* ZSTD_overlap_dst_before_src, */
207+
} ZSTD_overlap_e;
197208

198209
/*! ZSTD_wildcopy() :
199210
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
200-
#define WILDCOPY_OVERLENGTH 8
201-
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
211+
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
212+
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
202213
{
214+
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
203215
const BYTE* ip = (const BYTE*)src;
204216
BYTE* op = (BYTE*)dst;
205217
BYTE* const oend = op + length;
206-
do
207-
COPY8(op, ip)
208-
while (op < oend);
218+
219+
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
220+
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
221+
do
222+
COPY8(op, ip)
223+
while (op < oend);
224+
}
225+
else {
226+
if ((length & 8) == 0)
227+
COPY8(op, ip);
228+
do {
229+
COPY16(op, ip);
230+
}
231+
while (op < oend);
232+
}
233+
}
234+
235+
/*! ZSTD_wildcopy_16min() :
236+
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
237+
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
238+
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
239+
{
240+
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
241+
const BYTE* ip = (const BYTE*)src;
242+
BYTE* op = (BYTE*)dst;
243+
BYTE* const oend = op + length;
244+
245+
assert(length >= 8);
246+
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
247+
248+
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
249+
do
250+
COPY8(op, ip)
251+
while (op < oend);
252+
}
253+
else {
254+
if ((length & 8) == 0)
255+
COPY8(op, ip);
256+
do {
257+
COPY16(op, ip);
258+
}
259+
while (op < oend);
260+
}
209261
}
210262

211263
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */

0 commit comments

Comments
 (0)