Skip to content

Commit d62f656

Browse files
author
Raghuveer Devulapalli
authored
Merge pull request #168 from sterrettm2/kv-pivot
Adds smart pivot selection to key-value sorting
2 parents f99c392 + 8d378c9 commit d62f656

File tree

5 files changed

+55
-12
lines changed

5 files changed

+55
-12
lines changed

src/avx2-32bit-half.hpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ struct avx2_half_vector<int32_t> {
6464
{
6565
return _mm_set1_epi32(type_max());
6666
} // TODO: this should broadcast bits as is?
67+
static opmask_t knot_opmask(opmask_t x)
68+
{
69+
auto allOnes = seti(-1, -1, -1, -1);
70+
return _mm_xor_si128(x, allOnes);
71+
}
6772
static opmask_t get_partial_loadmask(uint64_t num_to_read)
6873
{
6974
auto mask = ((0x1ull << num_to_read) - 0x1ull);
@@ -186,6 +191,10 @@ struct avx2_half_vector<int32_t> {
186191
{
187192
return v;
188193
}
194+
static bool all_false(opmask_t k)
195+
{
196+
return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0;
197+
}
189198
static int double_compressstore(type_t *left_addr,
190199
type_t *right_addr,
191200
opmask_t k,
@@ -218,6 +227,11 @@ struct avx2_half_vector<uint32_t> {
218227
{
219228
return _mm_set1_epi32(type_max());
220229
}
230+
static opmask_t knot_opmask(opmask_t x)
231+
{
232+
auto allOnes = seti(-1, -1, -1, -1);
233+
return _mm_xor_si128(x, allOnes);
234+
}
221235
static opmask_t get_partial_loadmask(uint64_t num_to_read)
222236
{
223237
auto mask = ((0x1ull << num_to_read) - 0x1ull);
@@ -331,6 +345,10 @@ struct avx2_half_vector<uint32_t> {
331345
{
332346
return v;
333347
}
348+
static bool all_false(opmask_t k)
349+
{
350+
return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0;
351+
}
334352
static int double_compressstore(type_t *left_addr,
335353
type_t *right_addr,
336354
opmask_t k,
@@ -363,7 +381,11 @@ struct avx2_half_vector<float> {
363381
{
364382
return _mm_set1_ps(type_max());
365383
}
366-
384+
static opmask_t knot_opmask(opmask_t x)
385+
{
386+
auto allOnes = seti(-1, -1, -1, -1);
387+
return _mm_xor_si128(x, allOnes);
388+
}
367389
static regi_t seti(int v1, int v2, int v3, int v4)
368390
{
369391
return _mm_set_epi32(v1, v2, v3, v4);
@@ -492,6 +514,10 @@ struct avx2_half_vector<float> {
492514
{
493515
return _mm_castps_si128(v);
494516
}
517+
static bool all_false(opmask_t k)
518+
{
519+
return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0;
520+
}
495521
static int double_compressstore(type_t *left_addr,
496522
type_t *right_addr,
497523
opmask_t k,

src/avx512-64bit-common.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ struct ymm_vector<float> {
210210
{
211211
return _mm256_castps_si256(v);
212212
}
213+
static bool all_false(opmask_t k)
214+
{
215+
return k == 0;
216+
}
213217
static reg_t reverse(reg_t ymm)
214218
{
215219
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
@@ -394,6 +398,10 @@ struct ymm_vector<uint32_t> {
394398
{
395399
return v;
396400
}
401+
static bool all_false(opmask_t k)
402+
{
403+
return k == 0;
404+
}
397405
static reg_t reverse(reg_t ymm)
398406
{
399407
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
@@ -578,6 +586,10 @@ struct ymm_vector<int32_t> {
578586
{
579587
return v;
580588
}
589+
static bool all_false(opmask_t k)
590+
{
591+
return k == 0;
592+
}
581593
static reg_t reverse(reg_t ymm)
582594
{
583595
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);

src/xss-common-keyvaluesort.hpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ X86_SIMD_SORT_INLINE arrsize_t kvpartition(type_t1 *keys,
7272
for (int32_t i = (right - left) % vtype1::numlanes; i > 0; --i) {
7373
*smallest = std::min(*smallest, keys[left]);
7474
*biggest = std::max(*biggest, keys[left]);
75-
if (keys[left] > pivot) {
75+
if (keys[left] >= pivot) {
7676
right--;
7777
std::swap(keys[left], keys[right]);
7878
std::swap(indexes[left], indexes[right]);
@@ -204,12 +204,13 @@ X86_SIMD_SORT_INLINE arrsize_t kvpartition_unrolled(type_t1 *keys,
204204
return kvpartition<vtype1, vtype2>(
205205
keys, indexes, left, right, pivot, smallest, biggest);
206206
}
207+
207208
/* make array length divisible by vtype1::numlanes , shortening the array */
208209
for (int32_t i = ((right - left) % (num_unroll * vtype1::numlanes)); i > 0;
209210
--i) {
210211
*smallest = std::min(*smallest, keys[left]);
211212
*biggest = std::max(*biggest, keys[left]);
212-
if (keys[left] > pivot) {
213+
if (keys[left] >= pivot) {
213214
right--;
214215
std::swap(keys[left], keys[right]);
215216
std::swap(indexes[left], indexes[right]);
@@ -386,18 +387,27 @@ X86_SIMD_SORT_INLINE void kvsort_(type1_t *keys,
386387
* Base case: use bitonic networks to sort arrays <= 128
387388
*/
388389
if (right + 1 - left <= 128) {
389-
390390
kvsort_n<vtype1, vtype2, 128>(
391391
keys + left, indexes + left, (int32_t)(right + 1 - left));
392392
return;
393393
}
394394

395-
type1_t pivot = get_pivot_blocks<vtype1>(keys, left, right);
395+
// Ascending comparator for this vtype
396+
using comparator = Comparator<vtype1, false>;
397+
type1_t pivot;
398+
auto pivot_result
399+
= get_pivot_smart<vtype1, comparator, type1_t>(keys, left, right);
400+
pivot = pivot_result.pivot;
401+
402+
if (pivot_result.result == pivot_result_t::Sorted) { return; }
403+
396404
type1_t smallest = vtype1::type_max();
397405
type1_t biggest = vtype1::type_min();
398406
arrsize_t pivot_index = kvpartition_unrolled<vtype1, vtype2, 4>(
399407
keys, indexes, left, right + 1, pivot, &smallest, &biggest);
400408

409+
if (pivot_result.result == pivot_result_t::Only2Values) { return; }
410+
401411
#ifdef XSS_COMPILE_OPENMP
402412
if (pivot != smallest) {
403413
bool parallel_left = (pivot_index - left) > task_threshold;

src/xss-pivot-selection.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,7 @@ get_pivot_smart(type_t *arr, const arrsize_t left, const arrsize_t right)
148148
return pivot_results<type_t>(
149149
comparator::choosePivotMedianIsLargest(median));
150150
}
151-
else {
152-
// Should be unreachable
153-
return pivot_results<type_t>(median);
154-
}
155151

156-
// Should be unreachable
157152
return pivot_results<type_t>(median);
158153
}
159154

utils/rand_array.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ static std::vector<T> get_array(std::string arrtype,
136136
else {
137137
val = std::numeric_limits<T>::max();
138138
}
139-
for (size_t ii = 1; ii <= arrsize; ++ii) {
140-
if (rand() % 0x1) { arr[ii] = val; }
139+
for (size_t ii = 0; ii < arrsize; ++ii) {
140+
if (rand() & 0x1) { arr[ii] = val; }
141141
}
142142
}
143143
else {

0 commit comments

Comments
 (0)