Skip to content

Commit 6da5e1e

Browse files
authored
Fix: sz_copy_skylake tail handling on large input (#222)
1 parent eabe605 commit 6da5e1e

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

include/stringzilla/memory.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,7 @@ SZ_PUBLIC void sz_copy_skylake(sz_ptr_t target, sz_cptr_t source, sz_size_t leng
835835
__mmask64 tail_mask = _sz_u64_mask_until(tail_length);
836836
_mm512_mask_storeu_epi8(target, head_mask, _mm512_maskz_loadu_epi8(head_mask, source));
837837
_mm512_mask_storeu_epi8(target + head_length + body_length, tail_mask,
838-
_mm512_maskz_loadu_epi8(tail_mask, source));
838+
_mm512_maskz_loadu_epi8(tail_mask, source + head_length + body_length));
839839

840840
// Now in the main loop, we can use non-temporal loads and stores,
841841
// performing the operation in both directions.

0 commit comments

Comments
 (0)