Skip to content

Commit 3b41715

Browse files
committed
Standardized logic a bit more for code clarity
1 parent b80ff9b commit 3b41715

File tree

1 file changed

+48
-42
lines changed

1 file changed

+48
-42
lines changed

src/xss-reg-networks.hpp

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_4lanes(reg_t reg)
3434
const typename vtype::opmask_t oxA = convert_int_to_mask<vtype>(0xA);
3535
const typename vtype::opmask_t oxC = convert_int_to_mask<vtype>(0xC);
3636

37-
reg = cmp_merge<vtype>(reg, swizzle::template swap_n<vtype, 2>(reg), oxA);
38-
reg = cmp_merge<vtype>(reg, vtype::reverse(reg), oxC);
37+
reg = cmp_merge<vtype>(
38+
reg, swizzle::template reverse_n<vtype, 2>(reg), oxA);
39+
reg = cmp_merge<vtype>(
40+
reg, swizzle::template reverse_n<vtype, 4>(reg), oxC);
3941
reg = cmp_merge<vtype>(reg, swizzle::template swap_n<vtype, 2>(reg), oxA);
4042
return reg;
4143
}
@@ -57,12 +59,11 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_8lanes(reg_t reg)
5759
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAA);
5860
reg = cmp_merge<vtype>(
5961
reg, swizzle::template reverse_n<vtype, 4>(reg), oxCC);
62+
reg = cmp_merge<vtype>(reg, swizzle::template swap_n<vtype, 2>(reg), oxAA);
6063
reg = cmp_merge<vtype>(
61-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAA);
62-
reg = cmp_merge<vtype>(reg, vtype::reverse(reg), oxF0);
64+
reg, swizzle::template reverse_n<vtype, 8>(reg), oxF0);
6365
reg = cmp_merge<vtype>(reg, swizzle::template swap_n<vtype, 4>(reg), oxCC);
64-
reg = cmp_merge<vtype>(
65-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAA);
66+
reg = cmp_merge<vtype>(reg, swizzle::template swap_n<vtype, 2>(reg), oxAA);
6667
return reg;
6768
}
6869

@@ -85,20 +86,21 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_16lanes(reg_t reg)
8586
reg = cmp_merge<vtype>(
8687
reg, swizzle::template reverse_n<vtype, 4>(reg), oxCCCC);
8788
reg = cmp_merge<vtype>(
88-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAA);
89+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAA);
8990
reg = cmp_merge<vtype>(
9091
reg, swizzle::template reverse_n<vtype, 8>(reg), oxF0F0);
9192
reg = cmp_merge<vtype>(
9293
reg, swizzle::template swap_n<vtype, 4>(reg), oxCCCC);
9394
reg = cmp_merge<vtype>(
94-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAA);
95-
reg = cmp_merge<vtype>(reg, vtype::reverse(reg), oxFF00);
95+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAA);
96+
reg = cmp_merge<vtype>(
97+
reg, swizzle::template reverse_n<vtype, 16>(reg), oxFF00);
9698
reg = cmp_merge<vtype>(
9799
reg, swizzle::template swap_n<vtype, 8>(reg), oxF0F0);
98100
reg = cmp_merge<vtype>(
99101
reg, swizzle::template swap_n<vtype, 4>(reg), oxCCCC);
100102
reg = cmp_merge<vtype>(
101-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAA);
103+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAA);
102104
return reg;
103105
}
104106

@@ -129,14 +131,14 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_32lanes(reg_t reg)
129131
reg = cmp_merge<vtype>(
130132
reg, swizzle::template reverse_n<vtype, 4>(reg), oxCCCCCCCC);
131133
reg = cmp_merge<vtype>(
132-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAAAAAA);
134+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAAAAAA);
133135
// Level 3
134136
reg = cmp_merge<vtype>(
135137
reg, swizzle::template reverse_n<vtype, 8>(reg), oxF0F0F0F0);
136138
reg = cmp_merge<vtype>(
137139
reg, swizzle::template swap_n<vtype, 4>(reg), oxCCCCCCCC);
138140
reg = cmp_merge<vtype>(
139-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAAAAAA);
141+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAAAAAA);
140142
// Level 4
141143
reg = cmp_merge<vtype>(
142144
reg, swizzle::template reverse_n<vtype, 16>(reg), oxFF00FF00);
@@ -145,17 +147,18 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_32lanes(reg_t reg)
145147
reg = cmp_merge<vtype>(
146148
reg, swizzle::template swap_n<vtype, 4>(reg), oxCCCCCCCC);
147149
reg = cmp_merge<vtype>(
148-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAAAAAA);
150+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAAAAAA);
149151
// Level 5
150-
reg = cmp_merge<vtype>(reg, vtype::reverse(reg), oxFFFF0000);
152+
reg = cmp_merge<vtype>(
153+
reg, swizzle::template reverse_n<vtype, 32>(reg), oxFFFF0000);
151154
reg = cmp_merge<vtype>(
152155
reg, swizzle::template swap_n<vtype, 16>(reg), oxFF00FF00);
153156
reg = cmp_merge<vtype>(
154157
reg, swizzle::template swap_n<vtype, 8>(reg), oxF0F0F0F0);
155158
reg = cmp_merge<vtype>(
156159
reg, swizzle::template swap_n<vtype, 4>(reg), oxCCCCCCCC);
157160
reg = cmp_merge<vtype>(
158-
reg, swizzle::template reverse_n<vtype, 2>(reg), oxAAAAAAAA);
161+
reg, swizzle::template swap_n<vtype, 2>(reg), oxAAAAAAAA);
159162
return reg;
160163
}
161164

@@ -175,15 +178,16 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_4lanes(reg_t key_reg, index_type &index_reg)
175178

176179
key_reg = cmp_merge<vtype1, vtype2>(
177180
key_reg,
178-
key_swizzle::template swap_n<vtype1, 2>(key_reg),
181+
key_swizzle::template reverse_n<vtype1, 2>(key_reg),
179182
index_reg,
180-
index_swizzle::template swap_n<vtype2, 2>(index_reg),
183+
index_swizzle::template reverse_n<vtype2, 2>(index_reg),
181184
oxA);
182-
key_reg = cmp_merge<vtype1, vtype2>(key_reg,
183-
vtype1::reverse(key_reg),
184-
index_reg,
185-
vtype2::reverse(index_reg),
186-
oxC);
185+
key_reg = cmp_merge<vtype1, vtype2>(
186+
key_reg,
187+
key_swizzle::template reverse_n<vtype1, 4>(key_reg),
188+
index_reg,
189+
index_swizzle::template reverse_n<vtype2, 4>(index_reg),
190+
oxC);
187191
key_reg = cmp_merge<vtype1, vtype2>(
188192
key_reg,
189193
key_swizzle::template swap_n<vtype1, 2>(key_reg),
@@ -208,9 +212,9 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_8lanes(reg_t key_reg, index_type &index_reg)
208212

209213
key_reg = cmp_merge<vtype1, vtype2>(
210214
key_reg,
211-
key_swizzle::template swap_n<vtype1, 2>(key_reg),
215+
key_swizzle::template reverse_n<vtype1, 2>(key_reg),
212216
index_reg,
213-
index_swizzle::template swap_n<vtype2, 2>(index_reg),
217+
index_swizzle::template reverse_n<vtype2, 2>(index_reg),
214218
oxAA);
215219
key_reg = cmp_merge<vtype1, vtype2>(
216220
key_reg,
@@ -224,11 +228,12 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_8lanes(reg_t key_reg, index_type &index_reg)
224228
index_reg,
225229
index_swizzle::template swap_n<vtype2, 2>(index_reg),
226230
oxAA);
227-
key_reg = cmp_merge<vtype1, vtype2>(key_reg,
228-
vtype1::reverse(key_reg),
229-
index_reg,
230-
vtype2::reverse(index_reg),
231-
oxF0);
231+
key_reg = cmp_merge<vtype1, vtype2>(
232+
key_reg,
233+
key_swizzle::template reverse_n<vtype1, 8>(key_reg),
234+
index_reg,
235+
index_swizzle::template reverse_n<vtype2, 8>(index_reg),
236+
oxF0);
232237
key_reg = cmp_merge<vtype1, vtype2>(
233238
key_reg,
234239
key_swizzle::template swap_n<vtype1, 4>(key_reg),
@@ -273,9 +278,9 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_16lanes(reg_t key_reg,
273278
oxCCCC);
274279
key_reg = cmp_merge<vtype1, vtype2>(
275280
key_reg,
276-
key_swizzle::template reverse_n<vtype1, 2>(key_reg),
281+
key_swizzle::template swap_n<vtype1, 2>(key_reg),
277282
index_reg,
278-
index_swizzle::template reverse_n<vtype2, 2>(index_reg),
283+
index_swizzle::template swap_n<vtype2, 2>(index_reg),
279284
oxAAAA);
280285
key_reg = cmp_merge<vtype1, vtype2>(
281286
key_reg,
@@ -291,15 +296,16 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_16lanes(reg_t key_reg,
291296
oxCCCC);
292297
key_reg = cmp_merge<vtype1, vtype2>(
293298
key_reg,
294-
key_swizzle::template reverse_n<vtype1, 2>(key_reg),
299+
key_swizzle::template swap_n<vtype1, 2>(key_reg),
295300
index_reg,
296-
index_swizzle::template reverse_n<vtype2, 2>(index_reg),
301+
index_swizzle::template swap_n<vtype2, 2>(index_reg),
297302
oxAAAA);
298-
key_reg = cmp_merge<vtype1, vtype2>(key_reg,
299-
vtype1::reverse(key_reg),
300-
index_reg,
301-
vtype2::reverse(index_reg),
302-
oxFF00);
303+
key_reg = cmp_merge<vtype1, vtype2>(
304+
key_reg,
305+
key_swizzle::template reverse_n<vtype1, 16>(key_reg),
306+
index_reg,
307+
index_swizzle::template reverse_n<vtype2, 16>(index_reg),
308+
oxFF00);
303309
key_reg = cmp_merge<vtype1, vtype2>(
304310
key_reg,
305311
key_swizzle::template swap_n<vtype1, 8>(key_reg),
@@ -314,9 +320,9 @@ X86_SIMD_SORT_INLINE reg_t sort_reg_16lanes(reg_t key_reg,
314320
oxCCCC);
315321
key_reg = cmp_merge<vtype1, vtype2>(
316322
key_reg,
317-
key_swizzle::template reverse_n<vtype1, 2>(key_reg),
323+
key_swizzle::template swap_n<vtype1, 2>(key_reg),
318324
index_reg,
319-
index_swizzle::template reverse_n<vtype2, 2>(index_reg),
325+
index_swizzle::template swap_n<vtype2, 2>(index_reg),
320326
oxAAAA);
321327
return key_reg;
322328
}
@@ -427,9 +433,9 @@ X86_SIMD_SORT_INLINE reg_t bitonic_merge_reg_16lanes(reg_t key_reg,
427433
oxCCCC);
428434
key_reg = cmp_merge<vtype1, vtype2>(
429435
key_reg,
430-
key_swizzle::template reverse_n<vtype1, 2>(key_reg),
436+
key_swizzle::template swap_n<vtype1, 2>(key_reg),
431437
index_reg,
432-
index_swizzle::template reverse_n<vtype2, 2>(index_reg),
438+
index_swizzle::template swap_n<vtype2, 2>(index_reg),
433439
oxAAAA);
434440
return key_reg;
435441
}

0 commit comments

Comments
 (0)