Skip to content

Commit 3998c1f

Browse files
committed
Improve: Branchless K-mask calculation
1 parent b4eb6a4 commit 3998c1f

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

include/stringcuzilla/similarity.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,7 +2432,7 @@ struct tile_scorer<sz_rune_t const *, sz_rune_t const *, sz_u8_t, uniform_substi
24322432

24332433
// ? Note that here we are still traversing both buffers in the same order,
24342434
// ? because one of the strings has been reversed beforehand.
2435-
load_mask = _sz_u16_clamp_mask_until(n - i);
2435+
load_mask = _sz_u16_mask_until(n - i);
24362436
first_vec.zmm = _mm512_maskz_loadu_epi32(load_mask, first_reversed_slice + i);
24372437
second_vec.zmm = _mm512_maskz_loadu_epi32(load_mask, second_slice + i);
24382438
pre_substitution_vec.xmm = _mm_maskz_loadu_epi8(load_mask, scores_pre_substitution + i);
@@ -2533,7 +2533,7 @@ struct tile_scorer<char const *, char const *, sz_u16_t, uniform_substitution_co
25332533

25342534
// ? Note that here we are still traversing both buffers in the same order,
25352535
// ? because one of the strings has been reversed beforehand.
2536-
load_mask = _sz_u32_clamp_mask_until(n - i);
2536+
load_mask = _sz_u32_mask_until(n - i);
25372537
first_vec.ymm = _mm256_maskz_loadu_epi8(load_mask, first_reversed_slice + i);
25382538
second_vec.ymm = _mm256_maskz_loadu_epi8(load_mask, second_slice + i);
25392539
pre_substitution_vec.zmm = _mm512_maskz_loadu_epi16(load_mask, scores_pre_substitution + i);
@@ -2641,7 +2641,7 @@ struct tile_scorer<sz_rune_t const *, sz_rune_t const *, sz_u16_t, uniform_subst
26412641

26422642
// ? Note that here we are still traversing both buffers in the same order,
26432643
// ? because one of the strings has been reversed beforehand.
2644-
load_mask = _sz_u16_clamp_mask_until(n - i);
2644+
load_mask = _sz_u16_mask_until(n - i);
26452645
first_vec.zmm = _mm512_maskz_loadu_epi32(load_mask, first_reversed_slice + i);
26462646
second_vec.zmm = _mm512_maskz_loadu_epi32(load_mask, second_slice + i);
26472647
pre_substitution_vec.ymm = _mm256_maskz_loadu_epi16(load_mask, scores_pre_substitution + i);
@@ -3060,7 +3060,7 @@ struct tile_scorer<constant_iterator<char>, char const *, sz_i16_t, error_costs_
30603060
gap_cost_vec.zmm = _mm512_set1_epi16(gap);
30613061

30623062
// Load the data with a mask:
3063-
load_mask = _sz_u32_clamp_mask_until(n - i);
3063+
load_mask = _sz_u32_mask_until(n - i);
30643064
second_vec.ymms[0] = _mm256_maskz_loadu_epi8(load_mask, second_slice + i);
30653065
pre_substitution_vec.zmm = _mm512_maskz_loadu_epi16(load_mask, scores_pre_substitution + i);
30663066
pre_gap_vec.zmm = _mm512_maskz_loadu_epi16(load_mask, scores_pre_insertion + i);

0 commit comments

Comments
 (0)