|
34 | 34 | #endif
|
35 | 35 | #include "xxhash.h" /* XXH_reset, update, digest */
|
36 | 36 |
|
37 |
| - |
38 | 37 | #if defined (__cplusplus)
|
39 | 38 | extern "C" {
|
40 | 39 | #endif
|
@@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
193 | 192 | * Shared functions to include for inlining
|
194 | 193 | *********************************************/
|
195 | 194 | static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
| 195 | + |
196 | 196 | #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
| 197 | +static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } |
| 198 | +#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } |
| 199 | + |
| 200 | +#define WILDCOPY_OVERLENGTH 8 |
| 201 | +#define VECLEN 16 |
| 202 | + |
| 203 | +typedef enum { |
| 204 | + ZSTD_no_overlap, |
| 205 | + ZSTD_overlap_src_before_dst, |
| 206 | + /* ZSTD_overlap_dst_before_src, */ |
| 207 | +} ZSTD_overlap_e; |
197 | 208 |
|
198 | 209 | /*! ZSTD_wildcopy() :
|
199 | 210 | * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
|
200 |
| -#define WILDCOPY_OVERLENGTH 8 |
201 |
| -MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) |
| 211 | +MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE |
| 212 | +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) |
202 | 213 | {
|
| 214 | + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; |
203 | 215 | const BYTE* ip = (const BYTE*)src;
|
204 | 216 | BYTE* op = (BYTE*)dst;
|
205 | 217 | BYTE* const oend = op + length;
|
206 |
| - do |
207 |
| - COPY8(op, ip) |
208 |
| - while (op < oend); |
| 218 | + |
| 219 | + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); |
| 220 | + if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { |
| 221 | + do |
| 222 | + COPY8(op, ip) |
| 223 | + while (op < oend); |
| 224 | + } |
| 225 | + else { |
| 226 | + if ((length & 8) == 0) |
| 227 | + COPY8(op, ip); |
| 228 | + do { |
| 229 | + COPY16(op, ip); |
| 230 | + } |
| 231 | + while (op < oend); |
| 232 | + } |
| 233 | +} |
| 234 | + |
| 235 | +/*! ZSTD_wildcopy_16min() : |
| 236 | + * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ |
| 237 | +MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE |
| 238 | +void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) |
| 239 | +{ |
| 240 | + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; |
| 241 | + const BYTE* ip = (const BYTE*)src; |
| 242 | + BYTE* op = (BYTE*)dst; |
| 243 | + BYTE* const oend = op + length; |
| 244 | + |
| 245 | + assert(length >= 8); |
| 246 | + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); |
| 247 | + |
| 248 | + if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { |
| 249 | + do |
| 250 | + COPY8(op, ip) |
| 251 | + while (op < oend); |
| 252 | + } |
| 253 | + else { |
| 254 | + if ((length & 8) == 0) |
| 255 | + COPY8(op, ip); |
| 256 | + do { |
| 257 | + COPY16(op, ip); |
| 258 | + } |
| 259 | + while (op < oend); |
| 260 | + } |
209 | 261 | }
|
210 | 262 |
|
211 | 263 | MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
|
|
0 commit comments