Skip to content

Commit f8f611f

Browse files
author
Raghuveer Devulapalli
committed
format files
1 parent 9ad4432 commit f8f611f

11 files changed

+108
-71
lines changed

_clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ PenaltyExcessCharacter: 1000000
7474
PenaltyReturnTypeOnItsOwnLine: 60
7575
PointerAlignment: Right
7676
ReflowComments: false
77-
SortIncludes: true
77+
SortIncludes: false
7878
SortUsingDeclarations: true
7979
SpaceAfterCStyleCast: false
8080
SpaceAfterTemplateKeyword: true

src/avx2-32bit-common.h

Lines changed: 56 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,16 @@ X86_SIMD_SORT_INLINE reg_t sort_ymm_32bit(reg_t ymm)
4040
ymm = cmp_merge<vtype>(
4141
ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
4242
ymm = cmp_merge<vtype>(
43-
ymm, vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_1), ymm), oxCC);
43+
ymm,
44+
vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_1), ymm),
45+
oxCC);
4446
ymm = cmp_merge<vtype>(
4547
ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
4648
ymm = cmp_merge<vtype>(ymm, vtype::permutexvar(rev_index, ymm), oxF0);
4749
ymm = cmp_merge<vtype>(
48-
ymm, vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_3), ymm), oxCC);
50+
ymm,
51+
vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_3), ymm),
52+
oxCC);
4953
ymm = cmp_merge<vtype>(
5054
ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
5155
return ymm;
@@ -200,18 +204,21 @@ struct avx2_vector<int32_t> {
200204
{
201205
return sort_ymm_32bit<avx2_vector<type_t>>(x);
202206
}
203-
static reg_t cast_from(__m256i v){
207+
static reg_t cast_from(__m256i v)
208+
{
204209
return v;
205210
}
206-
static __m256i cast_to(reg_t v){
211+
static __m256i cast_to(reg_t v)
212+
{
207213
return v;
208214
}
209215
static int double_compressstore(type_t *left_addr,
210216
type_t *right_addr,
211217
opmask_t k,
212218
reg_t reg)
213219
{
214-
return avx2_double_compressstore32<type_t>(left_addr, right_addr, k, reg);
220+
return avx2_double_compressstore32<type_t>(
221+
left_addr, right_addr, k, reg);
215222
}
216223
};
217224
template <>
@@ -346,18 +353,21 @@ struct avx2_vector<uint32_t> {
346353
{
347354
return sort_ymm_32bit<avx2_vector<type_t>>(x);
348355
}
349-
static reg_t cast_from(__m256i v){
356+
static reg_t cast_from(__m256i v)
357+
{
350358
return v;
351359
}
352-
static __m256i cast_to(reg_t v){
360+
static __m256i cast_to(reg_t v)
361+
{
353362
return v;
354363
}
355364
static int double_compressstore(type_t *left_addr,
356365
type_t *right_addr,
357366
opmask_t k,
358367
reg_t reg)
359368
{
360-
return avx2_double_compressstore32<type_t>(left_addr, right_addr, k, reg);
369+
return avx2_double_compressstore32<type_t>(
370+
left_addr, right_addr, k, reg);
361371
}
362372
};
363373
template <>
@@ -419,9 +429,10 @@ struct avx2_vector<float> {
419429
template <int type>
420430
static opmask_t fpclass(reg_t x)
421431
{
422-
if constexpr (type == (0x01 | 0x80)){
432+
if constexpr (type == (0x01 | 0x80)) {
423433
return _mm256_castps_si256(_mm256_cmp_ps(x, x, _CMP_UNORD_Q));
424-
}else{
434+
}
435+
else {
425436
static_assert(type == (0x01 | 0x80), "should not reach here");
426437
}
427438
}
@@ -514,75 +525,90 @@ struct avx2_vector<float> {
514525
{
515526
return sort_ymm_32bit<avx2_vector<type_t>>(x);
516527
}
517-
static reg_t cast_from(__m256i v){
528+
static reg_t cast_from(__m256i v)
529+
{
518530
return _mm256_castsi256_ps(v);
519531
}
520-
static __m256i cast_to(reg_t v){
532+
static __m256i cast_to(reg_t v)
533+
{
521534
return _mm256_castps_si256(v);
522535
}
523536
static int double_compressstore(type_t *left_addr,
524537
type_t *right_addr,
525538
opmask_t k,
526539
reg_t reg)
527540
{
528-
return avx2_double_compressstore32<type_t>(left_addr, right_addr, k, reg);
541+
return avx2_double_compressstore32<type_t>(
542+
left_addr, right_addr, k, reg);
529543
}
530544
};
531545

532-
struct avx2_32bit_swizzle_ops{
546+
struct avx2_32bit_swizzle_ops {
533547
template <typename vtype, int scale>
534-
X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(typename vtype::reg_t reg){
548+
X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(typename vtype::reg_t reg)
549+
{
535550
__m256i v = vtype::cast_to(reg);
536551

537-
if constexpr (scale == 2){
552+
if constexpr (scale == 2) {
538553
__m256 vf = _mm256_castsi256_ps(v);
539554
vf = _mm256_permute_ps(vf, 0b10110001);
540555
v = _mm256_castps_si256(vf);
541-
}else if constexpr (scale == 4){
556+
}
557+
else if constexpr (scale == 4) {
542558
__m256 vf = _mm256_castsi256_ps(v);
543559
vf = _mm256_permute_ps(vf, 0b01001110);
544560
v = _mm256_castps_si256(vf);
545-
}else if constexpr (scale == 8){
561+
}
562+
else if constexpr (scale == 8) {
546563
v = _mm256_permute2x128_si256(v, v, 0b00000001);
547-
}else{
564+
}
565+
else {
548566
static_assert(scale == -1, "should not be reached");
549567
}
550568

551569
return vtype::cast_from(v);
552570
}
553571

554572
template <typename vtype, int scale>
555-
X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n(typename vtype::reg_t reg){
573+
X86_SIMD_SORT_INLINE typename vtype::reg_t
574+
reverse_n(typename vtype::reg_t reg)
575+
{
556576
__m256i v = vtype::cast_to(reg);
557577

558-
if constexpr (scale == 2){
559-
return swap_n<vtype, 2>(reg);
560-
}else if constexpr (scale == 4){
578+
if constexpr (scale == 2) { return swap_n<vtype, 2>(reg); }
579+
else if constexpr (scale == 4) {
561580
constexpr uint64_t mask = 0b00011011;
562581
__m256 vf = _mm256_castsi256_ps(v);
563582
vf = _mm256_permute_ps(vf, mask);
564583
v = _mm256_castps_si256(vf);
565-
}else if constexpr (scale == 8){
584+
}
585+
else if constexpr (scale == 8) {
566586
return vtype::reverse(reg);
567-
}else{
587+
}
588+
else {
568589
static_assert(scale == -1, "should not be reached");
569590
}
570591

571592
return vtype::cast_from(v);
572593
}
573594

574595
template <typename vtype, int scale>
575-
X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n(typename vtype::reg_t reg, typename vtype::reg_t other){
596+
X86_SIMD_SORT_INLINE typename vtype::reg_t
597+
merge_n(typename vtype::reg_t reg, typename vtype::reg_t other)
598+
{
576599
__m256i v1 = vtype::cast_to(reg);
577600
__m256i v2 = vtype::cast_to(other);
578601

579-
if constexpr (scale == 2){
602+
if constexpr (scale == 2) {
580603
v1 = _mm256_blend_epi32(v1, v2, 0b01010101);
581-
}else if constexpr (scale == 4){
604+
}
605+
else if constexpr (scale == 4) {
582606
v1 = _mm256_blend_epi32(v1, v2, 0b00110011);
583-
}else if constexpr (scale == 8){
607+
}
608+
else if constexpr (scale == 8) {
584609
v1 = _mm256_blend_epi32(v1, v2, 0b00001111);
585-
}else{
610+
}
611+
else {
586612
static_assert(scale == -1, "should not be reached");
587613
}
588614

src/avx2-emu-funcs.hpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,10 @@ T avx2_emu_reduce_max32(typename avx2_vector<T>::reg_t x)
7070
using vtype = avx2_vector<T>;
7171
using reg_t = typename vtype::reg_t;
7272

73-
reg_t inter1 = vtype::max(x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
74-
reg_t inter2 = vtype::max(inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
73+
reg_t inter1 = vtype::max(
74+
x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
75+
reg_t inter2 = vtype::max(
76+
inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
7577
T can1 = vtype::template extract<0>(inter2);
7678
T can2 = vtype::template extract<4>(inter2);
7779
return std::max(can1, can2);
@@ -83,8 +85,10 @@ T avx2_emu_reduce_min32(typename avx2_vector<T>::reg_t x)
8385
using vtype = avx2_vector<T>;
8486
using reg_t = typename vtype::reg_t;
8587

86-
reg_t inter1 = vtype::min(x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
87-
reg_t inter2 = vtype::min(inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
88+
reg_t inter1 = vtype::min(
89+
x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
90+
reg_t inter2 = vtype::min(
91+
inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
8892
T can1 = vtype::template extract<0>(inter2);
8993
T can2 = vtype::template extract<4>(inter2);
9094
return std::min(can1, can2);
@@ -112,9 +116,9 @@ void avx2_emu_mask_compressstoreu(void *base_addr,
112116

113117
template <typename T>
114118
int avx2_double_compressstore32(void *left_addr,
115-
void *right_addr,
116-
typename avx2_vector<T>::opmask_t k,
117-
typename avx2_vector<T>::reg_t reg)
119+
void *right_addr,
120+
typename avx2_vector<T>::opmask_t k,
121+
typename avx2_vector<T>::reg_t reg)
118122
{
119123
using vtype = avx2_vector<T>;
120124

@@ -137,7 +141,7 @@ int avx2_double_compressstore32(void *left_addr,
137141

138142
template <typename T>
139143
typename avx2_vector<T>::reg_t avx2_emu_max(typename avx2_vector<T>::reg_t x,
140-
typename avx2_vector<T>::reg_t y)
144+
typename avx2_vector<T>::reg_t y)
141145
{
142146
using vtype = avx2_vector<T>;
143147
typename vtype::opmask_t nlt = vtype::ge(x, y);
@@ -148,7 +152,7 @@ typename avx2_vector<T>::reg_t avx2_emu_max(typename avx2_vector<T>::reg_t x,
148152

149153
template <typename T>
150154
typename avx2_vector<T>::reg_t avx2_emu_min(typename avx2_vector<T>::reg_t x,
151-
typename avx2_vector<T>::reg_t y)
155+
typename avx2_vector<T>::reg_t y)
152156
{
153157
using vtype = avx2_vector<T>;
154158
typename vtype::opmask_t nlt = vtype::ge(x, y);

src/avx512-16bit-qsort.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ struct zmm_vector<float16> {
190190
opmask_t k,
191191
reg_t reg)
192192
{
193-
return avx512_double_compressstore<zmm_vector<float16>>(left_addr, right_addr, k, reg);
193+
return avx512_double_compressstore<zmm_vector<float16>>(
194+
left_addr, right_addr, k, reg);
194195
}
195196
};
196197

@@ -325,7 +326,8 @@ struct zmm_vector<int16_t> {
325326
opmask_t k,
326327
reg_t reg)
327328
{
328-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
329+
return avx512_double_compressstore<zmm_vector<type_t>>(
330+
left_addr, right_addr, k, reg);
329331
}
330332
};
331333
template <>
@@ -457,7 +459,8 @@ struct zmm_vector<uint16_t> {
457459
opmask_t k,
458460
reg_t reg)
459461
{
460-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
462+
return avx512_double_compressstore<zmm_vector<type_t>>(
463+
left_addr, right_addr, k, reg);
461464
}
462465
};
463466

src/avx512-32bit-qsort.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,8 @@ struct zmm_vector<int32_t> {
163163
opmask_t k,
164164
reg_t reg)
165165
{
166-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
166+
return avx512_double_compressstore<zmm_vector<type_t>>(
167+
left_addr, right_addr, k, reg);
167168
}
168169
};
169170
template <>
@@ -301,7 +302,8 @@ struct zmm_vector<uint32_t> {
301302
opmask_t k,
302303
reg_t reg)
303304
{
304-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
305+
return avx512_double_compressstore<zmm_vector<type_t>>(
306+
left_addr, right_addr, k, reg);
305307
}
306308
};
307309
template <>
@@ -453,7 +455,8 @@ struct zmm_vector<float> {
453455
opmask_t k,
454456
reg_t reg)
455457
{
456-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
458+
return avx512_double_compressstore<zmm_vector<type_t>>(
459+
left_addr, right_addr, k, reg);
457460
}
458461
};
459462

src/avx512-64bit-argsort.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ std_argsort(T *arr, arrsize_t *arg, arrsize_t left, arrsize_t right)
6565
});
6666
}
6767

68-
6968
/* Workaround for NumPy failed build on macOS x86_64: implicit instantiation of
7069
* undefined template 'zmm_vector<unsigned long>'*/
7170
#ifdef __APPLE__

src/avx512-64bit-common.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,8 @@ struct zmm_vector<int64_t> {
677677
opmask_t k,
678678
reg_t reg)
679679
{
680-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
680+
return avx512_double_compressstore<zmm_vector<type_t>>(
681+
left_addr, right_addr, k, reg);
681682
}
682683
};
683684
template <>
@@ -846,7 +847,8 @@ struct zmm_vector<uint64_t> {
846847
opmask_t k,
847848
reg_t reg)
848849
{
849-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
850+
return avx512_double_compressstore<zmm_vector<type_t>>(
851+
left_addr, right_addr, k, reg);
850852
}
851853
};
852854
template <>
@@ -1021,7 +1023,8 @@ struct zmm_vector<double> {
10211023
opmask_t k,
10221024
reg_t reg)
10231025
{
1024-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
1026+
return avx512_double_compressstore<zmm_vector<type_t>>(
1027+
left_addr, right_addr, k, reg);
10251028
}
10261029
};
10271030

src/avx512fp16-16bit-qsort.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,8 @@ struct zmm_vector<_Float16> {
154154
opmask_t k,
155155
reg_t reg)
156156
{
157-
return avx512_double_compressstore<zmm_vector<type_t>>(left_addr, right_addr, k, reg);
157+
return avx512_double_compressstore<zmm_vector<type_t>>(
158+
left_addr, right_addr, k, reg);
158159
}
159160
};
160161

src/xss-common-includes.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,3 @@ struct ymm_vector;
7575

7676
template <typename type>
7777
struct avx2_vector;
78-

0 commit comments

Comments
 (0)