3
3
4
4
#include " xss-common-includes.h"
5
5
6
- #define NETWORK_32BIT_1 14 , 15 , 12 , 13 , 10 , 11 , 8 , 9 , 6 , 7 , 4 , 5 , 2 , 3 , 0 , 1
7
- #define NETWORK_32BIT_3 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7
8
- #define NETWORK_32BIT_5 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15
9
- #define NETWORK_32BIT_6 11 , 10 , 9 , 8 , 15 , 14 , 13 , 12 , 3 , 2 , 1 , 0 , 7 , 6 , 5 , 4
10
- #define NETWORK_32BIT_7 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8
11
-
12
6
template <typename keyType, typename valueType>
13
7
typename valueType::opmask_t resize_mask (typename keyType::opmask_t mask)
14
8
{
15
9
using inT = typename keyType::opmask_t ;
16
10
using outT = typename valueType::opmask_t ;
17
-
18
- if constexpr (keyType::vec_type == simd_type::AVX512) { return mask; }
19
- else if constexpr (keyType::vec_type == simd_type::AVX2) {
20
- if constexpr (sizeof (inT) == sizeof (outT)) { return mask; }
21
- else if constexpr (sizeof (inT) == 32 && sizeof (outT) == 16 ){
22
- // We need to convert a mask made of 64 bit integers to 32 bit integers
23
- // This does this by taking advantage of the fact that the only bit that matters
24
- // is the very topmost bit, which becomes the sign bit when cast to floating point
25
-
26
- // TODO try and figure out if there is a better way to do this
27
- return _mm_castps_si128 (_mm256_cvtpd_ps (_mm256_castsi256_pd (mask)));
28
- }
29
- else if constexpr (sizeof (inT) == 16 && sizeof (outT) == 32 ){
30
- return _mm256_cvtepi32_epi64 (mask);
31
- }else {
32
- static_assert (sizeof (inT) == -1 , " should not reach here" );
33
- }
11
+
12
+ if constexpr (sizeof (inT) == sizeof (outT)) { // std::is_same_v<inT, outT>) {
13
+ return mask;
14
+ }
15
+ /* convert __m256i to __m128i */
16
+ else if constexpr (sizeof (inT) == 32 && sizeof (outT) == 16 ) {
17
+ return _mm_castps_si128 (_mm256_cvtpd_ps (_mm256_castsi256_pd (mask)));
18
+ }
19
+ /* convert __m128i to __m256i */
20
+ else if constexpr (sizeof (inT) == 16 && sizeof (outT) == 32 ) {
21
+ return _mm256_cvtepi32_epi64 (mask);
34
22
}
35
23
else {
36
- static_assert (keyType::vec_type == simd_type::AVX512,
37
- " should not reach here" );
24
+ static_assert (always_false<keyType>, " Error in func resize_mask" );
38
25
}
39
26
}
40
27
@@ -78,17 +65,6 @@ X86_SIMD_SORT_INLINE reg_t1 cmp_merge(reg_t1 in1,
78
65
return tmp_keys; // 0 -> min, 1 -> max
79
66
}
80
67
81
- /*
82
- * Constants used in sorting 8 elements in a ZMM registers. Based on Bitonic
83
- * sorting network (see
84
- * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
85
- */
86
- // ZMM 7, 6, 5, 4, 3, 2, 1, 0
87
- #define NETWORK_64BIT_1 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3
88
- #define NETWORK_64BIT_2 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7
89
- #define NETWORK_64BIT_3 5 , 4 , 7 , 6 , 1 , 0 , 3 , 2
90
- #define NETWORK_64BIT_4 3 , 2 , 1 , 0 , 7 , 6 , 5 , 4
91
-
92
68
template <typename vtype1,
93
69
typename vtype2,
94
70
typename reg_t = typename vtype1::reg_t ,
@@ -352,7 +328,7 @@ bitonic_merge_dispatch(typename keyType::reg_t &key,
352
328
key = bitonic_merge_ymm_64bit<keyType, valueType>(key, value);
353
329
}
354
330
else {
355
- static_assert (numlanes == - 1 , " No implementation" );
331
+ static_assert (always_false<keyType> , " bitonic_merge_dispatch: No implementation" );
356
332
UNUSED (key);
357
333
UNUSED (value);
358
334
}
@@ -373,7 +349,7 @@ X86_SIMD_SORT_INLINE void sort_vec_dispatch(typename keyType::reg_t &key,
373
349
key = sort_ymm_64bit<keyType, valueType>(key, value);
374
350
}
375
351
else {
376
- static_assert (numlanes == - 1 , " No implementation" );
352
+ static_assert (always_false<keyType> , " sort_vec_dispatch: No implementation" );
377
353
UNUSED (key);
378
354
UNUSED (value);
379
355
}
0 commit comments