Skip to content

Commit 14f504c

Browse files
author
Raghuveer Devulapalli
authored
Merge pull request #195 from sterrettm2/argsort-openmp
Add OpenMP support to argsort
2 parents 357c5e3 + df68479 commit 14f504c

File tree

5 files changed

+93
-11
lines changed

5 files changed

+93
-11
lines changed

benchmarks/bench-objsort.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ struct Point3D {
1313
static constexpr std::string_view name {val};
1414
Point3D()
1515
{
16-
x = (T)rand() / RAND_MAX;
17-
y = (T)rand() / RAND_MAX;
18-
z = (T)rand() / RAND_MAX;
16+
x = (T)rand() / (T)RAND_MAX;
17+
y = (T)rand() / (T)RAND_MAX;
18+
z = (T)rand() / (T)RAND_MAX;
1919
}
2020
T distance()
2121
{

src/xss-common-argsort.h

Lines changed: 86 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,8 @@ X86_SIMD_SORT_INLINE void argsort_(type_t *arr,
472472
arrsize_t *arg,
473473
arrsize_t left,
474474
arrsize_t right,
475-
arrsize_t max_iters)
475+
arrsize_t max_iters,
476+
arrsize_t task_threshold)
476477
{
477478
/*
478479
* Resort to std::sort if quicksort isnt making any progress
@@ -494,11 +495,57 @@ X86_SIMD_SORT_INLINE void argsort_(type_t *arr,
494495
type_t biggest = vtype::type_min();
495496
arrsize_t pivot_index = argpartition_unrolled<vtype, argtype, 4>(
496497
arr, arg, left, right + 1, pivot, &smallest, &biggest);
498+
#ifdef XSS_COMPILE_OPENMP
499+
if (pivot != smallest) {
500+
bool parallel_left = (pivot_index - left) > task_threshold;
501+
if (parallel_left) {
502+
#pragma omp task
503+
argsort_<vtype, argtype>(arr,
504+
arg,
505+
left,
506+
pivot_index - 1,
507+
max_iters - 1,
508+
task_threshold);
509+
}
510+
else {
511+
argsort_<vtype, argtype>(arr,
512+
arg,
513+
left,
514+
pivot_index - 1,
515+
max_iters - 1,
516+
task_threshold);
517+
}
518+
}
519+
if (pivot != biggest) {
520+
bool parallel_right = (right - pivot_index) > task_threshold;
521+
522+
if (parallel_right) {
523+
#pragma omp task
524+
argsort_<vtype, argtype>(arr,
525+
arg,
526+
pivot_index,
527+
right,
528+
max_iters - 1,
529+
task_threshold);
530+
}
531+
else {
532+
argsort_<vtype, argtype>(arr,
533+
arg,
534+
pivot_index,
535+
right,
536+
max_iters - 1,
537+
task_threshold);
538+
}
539+
}
540+
#else
541+
UNUSED(task_threshold);
497542
if (pivot != smallest)
498543
argsort_<vtype, argtype>(
499-
arr, arg, left, pivot_index - 1, max_iters - 1);
544+
arr, arg, left, pivot_index - 1, max_iters - 1, 0);
500545
if (pivot != biggest)
501-
argsort_<vtype, argtype>(arr, arg, pivot_index, right, max_iters - 1);
546+
argsort_<vtype, argtype>(
547+
arr, arg, pivot_index, right, max_iters - 1, 0);
548+
#endif
502549
}
503550

504551
template <typename vtype, typename argtype, typename type_t>
@@ -570,8 +617,43 @@ X86_SIMD_SORT_INLINE void xss_argsort(T *arr,
570617
}
571618
}
572619
UNUSED(hasnan);
620+
621+
#ifdef XSS_COMPILE_OPENMP
622+
623+
bool use_parallel = arrsize > 10000;
624+
625+
if (use_parallel) {
626+
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
627+
constexpr int thread_limit = 8;
628+
int thread_count = std::min(thread_limit, omp_get_max_threads());
629+
arrsize_t task_threshold
630+
= std::max((arrsize_t)10000, arrsize / 100);
631+
632+
// We use omp parallel and then omp single to setup the threads that will run the omp task calls in qsort_
633+
// The omp single prevents multiple threads from running the initial qsort_ simultaneously and causing problems
634+
// Note that we do not use the if(...) clause built into OpenMP, because it causes a performance regression for small arrays
635+
#pragma omp parallel num_threads(thread_count)
636+
#pragma omp single
637+
argsort_<vectype, argtype>(arr,
638+
arg,
639+
0,
640+
arrsize - 1,
641+
2 * (arrsize_t)log2(arrsize),
642+
task_threshold);
643+
#pragma omp taskwait
644+
}
645+
else {
646+
argsort_<vectype, argtype>(arr,
647+
arg,
648+
0,
649+
arrsize - 1,
650+
2 * (arrsize_t)log2(arrsize),
651+
std::numeric_limits<arrsize_t>::max());
652+
}
653+
#else
573654
argsort_<vectype, argtype>(
574-
arr, arg, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize));
655+
arr, arg, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize), 0);
656+
#endif
575657

576658
if (descending) { std::reverse(arg, arg + arrsize); }
577659
}

src/xss-common-keyvaluesort.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
627627
index_last_elem,
628628
maxiters,
629629
task_threshold);
630+
#pragma omp taskwait
630631
}
631632
else {
632633
kvsort_<keytype, valtype>(keys,
@@ -636,7 +637,6 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
636637
maxiters,
637638
std::numeric_limits<arrsize_t>::max());
638639
}
639-
#pragma omp taskwait
640640
#else
641641
kvsort_<keytype, valtype>(
642642
keys, indexes, 0, index_last_elem, maxiters, 0);

src/xss-common-qsort.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize, bool hasnan)
682682
arrsize - 1,
683683
2 * (arrsize_t)log2(arrsize),
684684
task_threshold);
685+
#pragma omp taskwait
685686
}
686687
else {
687688
qsort_<vtype, comparator, T>(arr,
@@ -690,7 +691,6 @@ X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize, bool hasnan)
690691
2 * (arrsize_t)log2(arrsize),
691692
std::numeric_limits<arrsize_t>::max());
692693
}
693-
#pragma omp taskwait
694694
#else
695695
qsort_<vtype, comparator, T>(
696696
arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize), 0);

tests/test-qsort.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ TYPED_TEST_P(simdsort, test_argsort_ascending)
8989
{
9090
for (auto type : this->arrtype) {
9191
bool hasnan = is_nan_test(type);
92-
for (auto size : this->arrsize) {
92+
for (auto size : this->arrsize_long) {
9393
std::vector<TypeParam> arr = get_array<TypeParam>(type, size);
9494
std::vector<TypeParam> sortedarr = arr;
9595

@@ -110,7 +110,7 @@ TYPED_TEST_P(simdsort, test_argsort_descending)
110110
{
111111
for (auto type : this->arrtype) {
112112
bool hasnan = is_nan_test(type);
113-
for (auto size : this->arrsize) {
113+
for (auto size : this->arrsize_long) {
114114
std::vector<TypeParam> arr = get_array<TypeParam>(type, size);
115115
std::vector<TypeParam> sortedarr = arr;
116116

0 commit comments

Comments
 (0)