Skip to content

Commit 1cf135b

Browse files
author
Raghuveer Devulapalli
committed
Re-write resize_mask and move constants to common file
1 parent 1834edc commit 1cf135b

File tree

2 files changed

+33
-38
lines changed

2 files changed

+33
-38
lines changed

src/xss-common-includes.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,25 @@
7171
#define X86_SIMD_SORT_UNROLL_LOOP(num)
7272
#endif
7373

74+
template <class... T>
75+
constexpr bool always_false = false;
76+
77+
/*
78+
* Constants used in sorting 8 elements in a ZMM registers. Based on Bitonic
79+
* sorting network (see
80+
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
81+
*/
82+
// ZMM 7, 6, 5, 4, 3, 2, 1, 0
83+
#define NETWORK_64BIT_1 4, 5, 6, 7, 0, 1, 2, 3
84+
#define NETWORK_64BIT_2 0, 1, 2, 3, 4, 5, 6, 7
85+
#define NETWORK_64BIT_3 5, 4, 7, 6, 1, 0, 3, 2
86+
#define NETWORK_64BIT_4 3, 2, 1, 0, 7, 6, 5, 4
87+
#define NETWORK_32BIT_1 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
88+
#define NETWORK_32BIT_3 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
89+
#define NETWORK_32BIT_5 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
90+
#define NETWORK_32BIT_6 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4
91+
#define NETWORK_32BIT_7 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
92+
7493
typedef size_t arrsize_t;
7594

7695
template <typename type>

src/xss-network-keyvaluesort.hpp

Lines changed: 14 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,25 @@
33

44
#include "xss-common-includes.h"
55

6-
#define NETWORK_32BIT_1 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
7-
#define NETWORK_32BIT_3 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7
8-
#define NETWORK_32BIT_5 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9-
#define NETWORK_32BIT_6 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4
10-
#define NETWORK_32BIT_7 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
11-
126
template <typename keyType, typename valueType>
137
typename valueType::opmask_t resize_mask(typename keyType::opmask_t mask)
148
{
159
using inT = typename keyType::opmask_t;
1610
using outT = typename valueType::opmask_t;
17-
18-
if constexpr (keyType::vec_type == simd_type::AVX512) { return mask; }
19-
else if constexpr (keyType::vec_type == simd_type::AVX2) {
20-
if constexpr (sizeof(inT) == sizeof(outT)) { return mask; }
21-
else if constexpr (sizeof(inT) == 32 && sizeof(outT) == 16){
22-
// We need to convert a mask made of 64 bit integers to 32 bit integers
23-
// This does this by taking advantage of the fact that the only bit that matters
24-
// is the very topmost bit, which becomes the sign bit when cast to floating point
25-
26-
// TODO try and figure out if there is a better way to do this
27-
return _mm_castps_si128(_mm256_cvtpd_ps(_mm256_castsi256_pd(mask)));
28-
}
29-
else if constexpr (sizeof(inT) == 16 && sizeof(outT) == 32){
30-
return _mm256_cvtepi32_epi64(mask);
31-
}else{
32-
static_assert(sizeof(inT) == -1, "should not reach here");
33-
}
11+
12+
if constexpr (sizeof(inT) == sizeof(outT)) { //std::is_same_v<inT, outT>) {
13+
return mask;
14+
}
15+
/* convert __m256i to __m128i */
16+
else if constexpr (sizeof(inT) == 32 && sizeof(outT) == 16) {
17+
return _mm_castps_si128(_mm256_cvtpd_ps(_mm256_castsi256_pd(mask)));
18+
}
19+
/* convert __m128i to __m256i */
20+
else if constexpr (sizeof(inT) == 16 && sizeof(outT) == 32) {
21+
return _mm256_cvtepi32_epi64(mask);
3422
}
3523
else {
36-
static_assert(keyType::vec_type == simd_type::AVX512,
37-
"should not reach here");
24+
static_assert(always_false<keyType>, "Error in func resize_mask");
3825
}
3926
}
4027

@@ -78,17 +65,6 @@ X86_SIMD_SORT_INLINE reg_t1 cmp_merge(reg_t1 in1,
7865
return tmp_keys; // 0 -> min, 1 -> max
7966
}
8067

81-
/*
82-
* Constants used in sorting 8 elements in a ZMM registers. Based on Bitonic
83-
* sorting network (see
84-
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
85-
*/
86-
// ZMM 7, 6, 5, 4, 3, 2, 1, 0
87-
#define NETWORK_64BIT_1 4, 5, 6, 7, 0, 1, 2, 3
88-
#define NETWORK_64BIT_2 0, 1, 2, 3, 4, 5, 6, 7
89-
#define NETWORK_64BIT_3 5, 4, 7, 6, 1, 0, 3, 2
90-
#define NETWORK_64BIT_4 3, 2, 1, 0, 7, 6, 5, 4
91-
9268
template <typename vtype1,
9369
typename vtype2,
9470
typename reg_t = typename vtype1::reg_t,
@@ -352,7 +328,7 @@ bitonic_merge_dispatch(typename keyType::reg_t &key,
352328
key = bitonic_merge_ymm_64bit<keyType, valueType>(key, value);
353329
}
354330
else {
355-
static_assert(numlanes == -1, "No implementation");
331+
static_assert(always_false<keyType>, "bitonic_merge_dispatch: No implementation");
356332
UNUSED(key);
357333
UNUSED(value);
358334
}
@@ -373,7 +349,7 @@ X86_SIMD_SORT_INLINE void sort_vec_dispatch(typename keyType::reg_t &key,
373349
key = sort_ymm_64bit<keyType, valueType>(key, value);
374350
}
375351
else {
376-
static_assert(numlanes == -1, "No implementation");
352+
static_assert(always_false<keyType>, "sort_vec_dispatch: No implementation");
377353
UNUSED(key);
378354
UNUSED(value);
379355
}

0 commit comments

Comments
 (0)