Skip to content

Commit 6d111f5

Browse files
committed
Some minor changes and clang-format
1 parent b67dd66 commit 6d111f5

File tree

2 files changed

+46
-48
lines changed

2 files changed

+46
-48
lines changed

src/avx2-64bit-qsort.hpp

Lines changed: 38 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ struct avx2_vector<int64_t> {
5757
static const uint8_t numlanes = 4;
5858
static constexpr int network_sort_threshold = 64;
5959
static constexpr int partition_unroll_factor = 4;
60-
60+
6161
using swizzle_ops = avx2_64bit_swizzle_ops;
6262

6363
static type_t type_max()
@@ -85,14 +85,6 @@ struct avx2_vector<int64_t> {
8585
{
8686
return _mm256_xor_si256(x, y);
8787
}
88-
static opmask_t knot_opmask(opmask_t x)
89-
{
90-
return ~x;
91-
}
92-
static opmask_t le(reg_t x, reg_t y)
93-
{
94-
return ~_mm256_cmpgt_epi64(x, y);
95-
}
9688
static opmask_t ge(reg_t x, reg_t y)
9789
{
9890
opmask_t equal = eq(x, y);
@@ -113,8 +105,7 @@ struct avx2_vector<int64_t> {
113105
template <int scale>
114106
static reg_t i64gather(__m256i index, void const *base)
115107
{
116-
return _mm256_i64gather_epi64(
117-
(long long int const *)base, index, scale);
108+
return _mm256_i64gather_epi64((int64_t const *)base, index, scale);
118109
}
119110
static reg_t loadu(void const *mem)
120111
{
@@ -205,10 +196,12 @@ struct avx2_vector<int64_t> {
205196
{
206197
return sort_ymm_64bit<avx2_vector<type_t>>(x);
207198
}
208-
static reg_t cast_from(__m256i v){
199+
static reg_t cast_from(__m256i v)
200+
{
209201
return v;
210202
}
211-
static __m256i cast_to(reg_t v){
203+
static __m256i cast_to(reg_t v)
204+
{
212205
return v;
213206
}
214207
};
@@ -221,7 +214,7 @@ struct avx2_vector<uint64_t> {
221214
static const uint8_t numlanes = 4;
222215
static constexpr int network_sort_threshold = 64;
223216
static constexpr int partition_unroll_factor = 4;
224-
217+
225218
using swizzle_ops = avx2_64bit_swizzle_ops;
226219

227220
static type_t type_max()
@@ -257,10 +250,6 @@ struct avx2_vector<uint64_t> {
257250
return _mm256_i64gather_epi64(
258251
(long long int const *)base, index, scale);
259252
}
260-
static opmask_t knot_opmask(opmask_t x)
261-
{
262-
return ~x;
263-
}
264253
static opmask_t ge(reg_t x, reg_t y)
265254
{
266255
opmask_t equal = eq(x, y);
@@ -362,10 +351,12 @@ struct avx2_vector<uint64_t> {
362351
{
363352
return sort_ymm_64bit<avx2_vector<type_t>>(x);
364353
}
365-
static reg_t cast_from(__m256i v){
354+
static reg_t cast_from(__m256i v)
355+
{
366356
return v;
367357
}
368-
static __m256i cast_to(reg_t v){
358+
static __m256i cast_to(reg_t v)
359+
{
369360
return v;
370361
}
371362
};
@@ -378,7 +369,7 @@ struct avx2_vector<double> {
378369
static const uint8_t numlanes = 4;
379370
static constexpr int network_sort_threshold = 64;
380371
static constexpr int partition_unroll_factor = 4;
381-
372+
382373
using swizzle_ops = avx2_64bit_swizzle_ops;
383374

384375
static type_t type_max()
@@ -421,10 +412,6 @@ struct avx2_vector<double> {
421412
{
422413
return _mm256_maskload_pd((const double *)mem, mask);
423414
}
424-
static opmask_t knot_opmask(opmask_t x)
425-
{
426-
return ~x;
427-
}
428415
static opmask_t ge(reg_t x, reg_t y)
429416
{
430417
return _mm256_castpd_si256(_mm256_cmp_pd(x, y, _CMP_GE_OQ));
@@ -531,55 +518,64 @@ struct avx2_vector<double> {
531518
{
532519
return sort_ymm_64bit<avx2_vector<type_t>>(x);
533520
}
534-
static reg_t cast_from(__m256i v){
521+
static reg_t cast_from(__m256i v)
522+
{
535523
return _mm256_castsi256_pd(v);
536524
}
537-
static __m256i cast_to(reg_t v){
525+
static __m256i cast_to(reg_t v)
526+
{
538527
return _mm256_castpd_si256(v);
539528
}
540529
};
541530

542-
struct avx2_64bit_swizzle_ops{
531+
struct avx2_64bit_swizzle_ops {
543532
template <typename vtype, int scale>
544-
X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(typename vtype::reg_t reg){
533+
X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(typename vtype::reg_t reg)
534+
{
545535
__m256i v = vtype::cast_to(reg);
546536

547-
if constexpr (scale == 2){
537+
if constexpr (scale == 2) {
548538
v = _mm256_permute4x64_epi64(v, 0b10110001);
549-
}else if constexpr (scale == 4){
539+
}
540+
else if constexpr (scale == 4) {
550541
v = _mm256_permute4x64_epi64(v, 0b01001110);
551-
}else{
542+
}
543+
else {
552544
static_assert(scale == -1, "should not be reached");
553545
}
554546

555547
return vtype::cast_from(v);
556548
}
557549

558550
template <typename vtype, int scale>
559-
X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n(typename vtype::reg_t reg){
551+
X86_SIMD_SORT_INLINE typename vtype::reg_t
552+
reverse_n(typename vtype::reg_t reg)
553+
{
560554
__m256i v = vtype::cast_to(reg);
561555

562-
if constexpr (scale == 2){
563-
return swap_n<vtype, 2>(reg);
564-
}else if constexpr (scale == 4){
556+
if constexpr (scale == 2) { return swap_n<vtype, 2>(reg); }
557+
else if constexpr (scale == 4) {
565558
return vtype::reverse(reg);
566-
}else{
559+
}
560+
else {
567561
static_assert(scale == -1, "should not be reached");
568562
}
569563

570564
return vtype::cast_from(v);
571565
}
572566

573567
template <typename vtype, int scale>
574-
X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n(typename vtype::reg_t reg, typename vtype::reg_t other){
568+
X86_SIMD_SORT_INLINE typename vtype::reg_t
569+
merge_n(typename vtype::reg_t reg, typename vtype::reg_t other)
570+
{
575571
__m256d v1 = _mm256_castsi256_pd(vtype::cast_to(reg));
576572
__m256d v2 = _mm256_castsi256_pd(vtype::cast_to(other));
577573

578-
if constexpr (scale == 2){
579-
v1 = _mm256_blend_pd(v1, v2, 0b0101);
580-
}else if constexpr (scale == 4){
574+
if constexpr (scale == 2) { v1 = _mm256_blend_pd(v1, v2, 0b0101); }
575+
else if constexpr (scale == 4) {
581576
v1 = _mm256_blend_pd(v1, v2, 0b0011);
582-
}else{
577+
}
578+
else {
583579
static_assert(scale == -1, "should not be reached");
584580
}
585581

src/avx2-emu-funcs.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ T avx2_emu_reduce_min64(typename avx2_vector<T>::reg_t x)
178178

179179
template <typename T>
180180
void avx2_emu_mask_compressstoreu32(void *base_addr,
181-
typename avx2_vector<T>::opmask_t k,
182-
typename avx2_vector<T>::reg_t reg)
181+
typename avx2_vector<T>::opmask_t k,
182+
typename avx2_vector<T>::reg_t reg)
183183
{
184184
using vtype = avx2_vector<T>;
185185

@@ -198,8 +198,8 @@ void avx2_emu_mask_compressstoreu32(void *base_addr,
198198

199199
template <typename T>
200200
void avx2_emu_mask_compressstoreu64(void *base_addr,
201-
typename avx2_vector<T>::opmask_t k,
202-
typename avx2_vector<T>::reg_t reg)
201+
typename avx2_vector<T>::opmask_t k,
202+
typename avx2_vector<T>::reg_t reg)
203203
{
204204
using vtype = avx2_vector<T>;
205205

@@ -211,7 +211,8 @@ void avx2_emu_mask_compressstoreu64(void *base_addr,
211211
const __m256i &left = _mm256_loadu_si256(
212212
(const __m256i *)avx2_compressstore_lut64_left[shortMask].data());
213213

214-
typename vtype::reg_t temp = vtype::cast_from(_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));
214+
typename vtype::reg_t temp = vtype::cast_from(
215+
_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));
215216

216217
vtype::mask_storeu(leftStore, left, temp);
217218
}
@@ -258,7 +259,8 @@ int32_t avx2_double_compressstore64(void *left_addr,
258259
const __m256i &left = _mm256_loadu_si256(
259260
(const __m256i *)avx2_compressstore_lut64_left[shortMask].data());
260261

261-
typename vtype::reg_t temp = vtype::cast_from(_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));
262+
typename vtype::reg_t temp = vtype::cast_from(
263+
_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));
262264

263265
vtype::mask_storeu(leftStore, left, temp);
264266
vtype::mask_storeu(rightStore, ~left, temp);

0 commit comments

Comments
 (0)