Skip to content

Commit 7a83e52

Browse files
committed
clang-format
1 parent 01bae64 commit 7a83e52

File tree

6 files changed

+112
-144
lines changed

6 files changed

+112
-144
lines changed

src/avx2-32bit-half.hpp

Lines changed: 33 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,13 @@ template <typename vtype, typename reg_t = typename vtype::reg_t>
3030
X86_SIMD_SORT_INLINE reg_t sort_ymm_32bit_half(reg_t ymm)
3131
{
3232
using swizzle = typename vtype::swizzle_ops;
33-
34-
const typename vtype::opmask_t oxAA
35-
= vtype::seti(-1, 0, -1, 0);
36-
const typename vtype::opmask_t oxCC
37-
= vtype::seti(-1, -1, 0, 0);
38-
39-
ymm = cmp_merge<vtype>(
40-
ymm,
41-
swizzle::template swap_n<vtype, 2>(ymm),
42-
oxAA);
43-
ymm = cmp_merge<vtype>(
44-
ymm,
45-
vtype::reverse(ymm),
46-
oxCC);
47-
ymm = cmp_merge<vtype>(
48-
ymm,
49-
swizzle::template swap_n<vtype, 2>(ymm),
50-
oxAA);
33+
34+
const typename vtype::opmask_t oxAA = vtype::seti(-1, 0, -1, 0);
35+
const typename vtype::opmask_t oxCC = vtype::seti(-1, -1, 0, 0);
36+
37+
ymm = cmp_merge<vtype>(ymm, swizzle::template swap_n<vtype, 2>(ymm), oxAA);
38+
ymm = cmp_merge<vtype>(ymm, vtype::reverse(ymm), oxCC);
39+
ymm = cmp_merge<vtype>(ymm, swizzle::template swap_n<vtype, 2>(ymm), oxAA);
5140
return ymm;
5241
}
5342

@@ -61,7 +50,7 @@ struct avx2_half_vector<int32_t> {
6150
using opmask_t = __m128i;
6251
static const uint8_t numlanes = 4;
6352
static constexpr simd_type vec_type = simd_type::AVX2;
64-
53+
6554
using swizzle_ops = avx2_32bit_half_swizzle_ops;
6655

6756
static type_t type_max()
@@ -81,13 +70,11 @@ struct avx2_half_vector<int32_t> {
8170
auto mask = ((0x1ull << num_to_read) - 0x1ull);
8271
return convert_int_to_avx2_mask_half(mask);
8372
}
84-
static ymmi_t
85-
seti(int v1, int v2, int v3, int v4)
73+
static ymmi_t seti(int v1, int v2, int v3, int v4)
8674
{
8775
return _mm_set_epi32(v1, v2, v3, v4);
8876
}
89-
static reg_t
90-
set(int v1, int v2, int v3, int v4)
77+
static reg_t set(int v1, int v2, int v3, int v4)
9178
{
9279
return _mm_set_epi32(v1, v2, v3, v4);
9380
}
@@ -99,8 +86,8 @@ struct avx2_half_vector<int32_t> {
9986
{
10087
opmask_t equal = eq(x, y);
10188
opmask_t greater = _mm_cmpgt_epi32(x, y);
102-
return _mm_castps_si128(_mm_or_ps(_mm_castsi128_ps(equal),
103-
_mm_castsi128_ps(greater)));
89+
return _mm_castps_si128(
90+
_mm_or_ps(_mm_castsi128_ps(equal), _mm_castsi128_ps(greater)));
10491
}
10592
static opmask_t eq(reg_t x, reg_t y)
10693
{
@@ -110,14 +97,12 @@ struct avx2_half_vector<int32_t> {
11097
static reg_t
11198
mask_i64gather(reg_t src, opmask_t mask, __m256i index, void const *base)
11299
{
113-
return _mm256_mask_i64gather_epi32(src, (const int *) base, index, mask, scale);
100+
return _mm256_mask_i64gather_epi32(
101+
src, (const int *)base, index, mask, scale);
114102
}
115103
static reg_t i64gather(type_t *arr, arrsize_t *ind)
116104
{
117-
return set(arr[ind[3]],
118-
arr[ind[2]],
119-
arr[ind[1]],
120-
arr[ind[0]]);
105+
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
121106
}
122107
static reg_t loadu(void const *mem)
123108
{
@@ -143,8 +128,8 @@ struct avx2_half_vector<int32_t> {
143128
static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y)
144129
{
145130
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(x),
146-
_mm_castsi128_ps(y),
147-
_mm_castsi128_ps(mask)));
131+
_mm_castsi128_ps(y),
132+
_mm_castsi128_ps(mask)));
148133
}
149134
static void mask_storeu(void *mem, opmask_t mask, reg_t x)
150135
{
@@ -217,7 +202,7 @@ struct avx2_half_vector<uint32_t> {
217202
using opmask_t = __m128i;
218203
static const uint8_t numlanes = 4;
219204
static constexpr simd_type vec_type = simd_type::AVX2;
220-
205+
221206
using swizzle_ops = avx2_32bit_half_swizzle_ops;
222207

223208
static type_t type_max()
@@ -237,28 +222,24 @@ struct avx2_half_vector<uint32_t> {
237222
auto mask = ((0x1ull << num_to_read) - 0x1ull);
238223
return convert_int_to_avx2_mask_half(mask);
239224
}
240-
static ymmi_t
241-
seti(int v1, int v2, int v3, int v4)
225+
static ymmi_t seti(int v1, int v2, int v3, int v4)
242226
{
243227
return _mm_set_epi32(v1, v2, v3, v4);
244228
}
245-
static reg_t
246-
set(int v1, int v2, int v3, int v4)
229+
static reg_t set(int v1, int v2, int v3, int v4)
247230
{
248231
return _mm_set_epi32(v1, v2, v3, v4);
249232
}
250233
template <int scale>
251234
static reg_t
252235
mask_i64gather(reg_t src, opmask_t mask, __m256i index, void const *base)
253236
{
254-
return _mm256_mask_i64gather_epi32(src, (const int *) base, index, mask, scale);
237+
return _mm256_mask_i64gather_epi32(
238+
src, (const int *)base, index, mask, scale);
255239
}
256240
static reg_t i64gather(type_t *arr, arrsize_t *ind)
257241
{
258-
return set(arr[ind[3]],
259-
arr[ind[2]],
260-
arr[ind[1]],
261-
arr[ind[0]]);
242+
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
262243
}
263244
static opmask_t ge(reg_t x, reg_t y)
264245
{
@@ -289,8 +270,8 @@ struct avx2_half_vector<uint32_t> {
289270
static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y)
290271
{
291272
return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(x),
292-
_mm_castsi128_ps(y),
293-
_mm_castsi128_ps(mask)));
273+
_mm_castsi128_ps(y),
274+
_mm_castsi128_ps(mask)));
294275
}
295276
static void mask_storeu(void *mem, opmask_t mask, reg_t x)
296277
{
@@ -363,7 +344,7 @@ struct avx2_half_vector<float> {
363344
using opmask_t = __m128i;
364345
static const uint8_t numlanes = 4;
365346
static constexpr simd_type vec_type = simd_type::AVX2;
366-
347+
367348
using swizzle_ops = avx2_32bit_half_swizzle_ops;
368349

369350
static type_t type_max()
@@ -379,13 +360,11 @@ struct avx2_half_vector<float> {
379360
return _mm_set1_ps(type_max());
380361
}
381362

382-
static ymmi_t
383-
seti(int v1, int v2, int v3, int v4)
363+
static ymmi_t seti(int v1, int v2, int v3, int v4)
384364
{
385365
return _mm_set_epi32(v1, v2, v3, v4);
386366
}
387-
static reg_t
388-
set(float v1, float v2, float v3, float v4)
367+
static reg_t set(float v1, float v2, float v3, float v4)
389368
{
390369
return _mm_set_ps(v1, v2, v3, v4);
391370
}
@@ -424,14 +403,12 @@ struct avx2_half_vector<float> {
424403
static reg_t
425404
mask_i64gather(reg_t src, opmask_t mask, __m256i index, void const *base)
426405
{
427-
return _mm256_mask_i64gather_ps(src, (const float*) base, index, _mm_castsi128_ps(mask), scale);
406+
return _mm256_mask_i64gather_ps(
407+
src, (const float *)base, index, _mm_castsi128_ps(mask), scale);
428408
}
429409
static reg_t i64gather(type_t *arr, arrsize_t *ind)
430410
{
431-
return set(arr[ind[3]],
432-
arr[ind[2]],
433-
arr[ind[1]],
434-
arr[ind[0]]);
411+
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
435412
}
436413
static reg_t loadu(void const *mem)
437414
{
@@ -490,8 +467,7 @@ struct avx2_half_vector<float> {
490467
template <uint8_t mask>
491468
static reg_t shuffle(reg_t ymm)
492469
{
493-
return _mm_castsi128_ps(
494-
_mm_shuffle_epi32(_mm_castps_si128(ymm), mask));
470+
return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(ymm), mask));
495471
}
496472
static void storeu(void *mem, reg_t x)
497473
{
@@ -566,9 +542,7 @@ struct avx2_32bit_half_swizzle_ops {
566542
__m128i v1 = vtype::cast_to(reg);
567543
__m128i v2 = vtype::cast_to(other);
568544

569-
if constexpr (scale == 2) {
570-
v1 = _mm_blend_epi32(v1, v2, 0b0101);
571-
}
545+
if constexpr (scale == 2) { v1 = _mm_blend_epi32(v1, v2, 0b0101); }
572546
else if constexpr (scale == 4) {
573547
v1 = _mm_blend_epi32(v1, v2, 0b0011);
574548
}

src/avx2-64bit-qsort.hpp

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,7 @@ struct avx2_vector<int64_t> {
7777
{
7878
return _mm256_set_epi64x(v1, v2, v3, v4);
7979
}
80-
static reg_t set(type_t v1,
81-
type_t v2,
82-
type_t v3,
83-
type_t v4)
80+
static reg_t set(type_t v1, type_t v2, type_t v3, type_t v4)
8481
{
8582
return _mm256_set_epi64x(v1, v2, v3, v4);
8683
}
@@ -106,14 +103,12 @@ struct avx2_vector<int64_t> {
106103
static reg_t
107104
mask_i64gather(reg_t src, opmask_t mask, __m256i index, void const *base)
108105
{
109-
return _mm256_mask_i64gather_epi64(src, (const long long int *) base, index, mask, scale);
106+
return _mm256_mask_i64gather_epi64(
107+
src, (const long long int *)base, index, mask, scale);
110108
}
111109
static reg_t i64gather(type_t *arr, arrsize_t *ind)
112110
{
113-
return set(arr[ind[3]],
114-
arr[ind[2]],
115-
arr[ind[1]],
116-
arr[ind[0]]);
111+
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
117112
}
118113
static reg_t loadu(void const *mem)
119114
{
@@ -246,25 +241,20 @@ struct avx2_vector<uint64_t> {
246241
{
247242
return _mm256_set_epi64x(v1, v2, v3, v4);
248243
}
249-
static reg_t set(type_t v1,
250-
type_t v2,
251-
type_t v3,
252-
type_t v4)
244+
static reg_t set(type_t v1, type_t v2, type_t v3, type_t v4)
253245
{
254246
return _mm256_set_epi64x(v1, v2, v3, v4);
255247
}
256248
template <int scale>
257249
static reg_t
258250
mask_i64gather(reg_t src, opmask_t mask, __m256i index, void const *base)
259251
{
260-
return _mm256_mask_i64gather_epi64(src, (const long long int *) base, index, mask, scale);
252+
return _mm256_mask_i64gather_epi64(
253+
src, (const long long int *)base, index, mask, scale);
261254
}
262255
static reg_t i64gather(type_t *arr, arrsize_t *ind)
263256
{
264-
return set(arr[ind[3]],
265-
arr[ind[2]],
266-
arr[ind[1]],
267-
arr[ind[0]]);
257+
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
268258
}
269259
static opmask_t gt(reg_t x, reg_t y)
270260
{
@@ -427,10 +417,7 @@ struct avx2_vector<double> {
427417
{
428418
return _mm256_set_epi64x(v1, v2, v3, v4);
429419
}
430-
static reg_t set(type_t v1,
431-
type_t v2,
432-
type_t v3,
433-
type_t v4)
420+
static reg_t set(type_t v1, type_t v2, type_t v3, type_t v4)
434421
{
435422
return _mm256_set_pd(v1, v2, v3, v4);
436423
}
@@ -450,16 +437,16 @@ struct avx2_vector<double> {
450437
static reg_t
451438
mask_i64gather(reg_t src, opmask_t mask, __m256i index, void const *base)
452439
{
453-
return _mm256_mask_i64gather_pd(
454-
src, (const type_t *) base, index, _mm256_castsi256_pd(mask), scale);
440+
return _mm256_mask_i64gather_pd(src,
441+
(const type_t *)base,
442+
index,
443+
_mm256_castsi256_pd(mask),
444+
scale);
455445
;
456446
}
457447
static reg_t i64gather(type_t *arr, arrsize_t *ind)
458448
{
459-
return set(arr[ind[3]],
460-
arr[ind[2]],
461-
arr[ind[1]],
462-
arr[ind[0]]);
449+
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
463450
}
464451
static reg_t loadu(void const *mem)
465452
{

src/avx2-emu-funcs.hpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,10 @@ constexpr auto avx2_compressstore_lut32_half_gen = [] {
107107
return lutPair;
108108
}();
109109

110-
constexpr auto avx2_compressstore_lut32_half_perm = avx2_compressstore_lut32_half_gen[0];
111-
constexpr auto avx2_compressstore_lut32_half_left = avx2_compressstore_lut32_half_gen[1];
110+
constexpr auto avx2_compressstore_lut32_half_perm
111+
= avx2_compressstore_lut32_half_gen[0];
112+
constexpr auto avx2_compressstore_lut32_half_left
113+
= avx2_compressstore_lut32_half_gen[1];
112114

113115
constexpr auto avx2_compressstore_lut64_gen = [] {
114116
std::array<std::array<int32_t, 8>, 16> permLut {};
@@ -281,19 +283,22 @@ void avx2_emu_mask_compressstoreu32(void *base_addr,
281283
}
282284

283285
template <typename T>
284-
void avx2_emu_mask_compressstoreu32_half(void *base_addr,
285-
typename avx2_half_vector<T>::opmask_t k,
286-
typename avx2_half_vector<T>::reg_t reg)
286+
void avx2_emu_mask_compressstoreu32_half(
287+
void *base_addr,
288+
typename avx2_half_vector<T>::opmask_t k,
289+
typename avx2_half_vector<T>::reg_t reg)
287290
{
288291
using vtype = avx2_half_vector<T>;
289292

290293
T *leftStore = (T *)base_addr;
291294

292295
int32_t shortMask = convert_avx2_mask_to_int_half(k);
293296
const __m128i &perm = _mm_loadu_si128(
294-
(const __m128i *)avx2_compressstore_lut32_half_perm[shortMask].data());
297+
(const __m128i *)avx2_compressstore_lut32_half_perm[shortMask]
298+
.data());
295299
const __m128i &left = _mm_loadu_si128(
296-
(const __m128i *)avx2_compressstore_lut32_half_left[shortMask].data());
300+
(const __m128i *)avx2_compressstore_lut32_half_left[shortMask]
301+
.data());
297302

298303
typename vtype::reg_t temp = vtype::permutevar(reg, perm);
299304

@@ -346,9 +351,9 @@ int avx2_double_compressstore32(void *left_addr,
346351

347352
template <typename T>
348353
int avx2_double_compressstore32_half(void *left_addr,
349-
void *right_addr,
350-
typename avx2_half_vector<T>::opmask_t k,
351-
typename avx2_half_vector<T>::reg_t reg)
354+
void *right_addr,
355+
typename avx2_half_vector<T>::opmask_t k,
356+
typename avx2_half_vector<T>::reg_t reg)
352357
{
353358
using vtype = avx2_half_vector<T>;
354359

@@ -357,7 +362,8 @@ int avx2_double_compressstore32_half(void *left_addr,
357362

358363
int32_t shortMask = convert_avx2_mask_to_int_half(k);
359364
const __m128i &perm = _mm_loadu_si128(
360-
(const __m128i *)avx2_compressstore_lut32_half_perm[shortMask].data());
365+
(const __m128i *)avx2_compressstore_lut32_half_perm[shortMask]
366+
.data());
361367

362368
typename vtype::reg_t temp = vtype::permutevar(reg, perm);
363369

src/xss-common-argsort.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,6 @@ avx2_argsort(T *arr, arrsize_t *arg, arrsize_t arrsize, bool hasnan = false)
614614
avx2_half_vector<arrsize_t>,
615615
avx2_vector<arrsize_t>>::type;
616616

617-
618617
if (arrsize > 1) {
619618
if constexpr (std::is_floating_point_v<T>) {
620619
if ((hasnan) && (array_has_nan<vectype>(arr, arrsize))) {

src/xss-common-includes.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ struct avx2_vector;
8585
template <typename type>
8686
struct avx2_half_vector;
8787

88-
enum class simd_type:int{
89-
AVX2, AVX512
90-
};
88+
enum class simd_type : int { AVX2, AVX512 };
9189

9290
#endif // XSS_COMMON_INCLUDES

0 commit comments

Comments
 (0)