Skip to content

Commit 2fb0de7

Browse files
rmilkowskirmilkowskiGSA
authored andcommitted
compress: vectorize ZSTD_count() with SSE2
1 parent d7ee320 commit 2fb0de7

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

lib/compress/zstd_compress_internal.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,11 +855,30 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
855855
{
856856
const BYTE* const pStart = pIn;
857857
const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
858+
#if defined(ZSTD_ARCH_X86_SSE2)
859+
const BYTE* const pInLimit16 = pInLimit - (sizeof(__m128i)-1);
860+
#endif
858861

859862
if (pIn < pInLoopLimit) {
860863
{ size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
861864
if (diff) return ZSTD_NbCommonBytes(diff); }
862865
pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
866+
#if defined(ZSTD_ARCH_X86_SSE2)
867+
if ((size_t)(pInLimit - pIn) >= 32) {
868+
while (pIn < pInLimit16) {
869+
__m128i const matchVec = _mm_loadu_si128((const __m128i*)pMatch);
870+
__m128i const inVec = _mm_loadu_si128((const __m128i*)pIn);
871+
U32 const matchMask = (U32)_mm_movemask_epi8(_mm_cmpeq_epi8(matchVec, inVec));
872+
if (matchMask != 0xFFFF) {
873+
U32 const diffMask = ~matchMask & 0xFFFF;
874+
pIn += ZSTD_countTrailingZeros32(diffMask);
875+
return (size_t)(pIn - pStart);
876+
}
877+
pIn += sizeof(__m128i);
878+
pMatch += sizeof(__m128i);
879+
}
880+
}
881+
#endif
863882
while (pIn < pInLoopLimit) {
864883
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
865884
if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }

0 commit comments

Comments
 (0)