Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions benchmarks/bench-objsort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ struct Point3D {
static constexpr std::string_view name {val};
Point3D()
{
x = (T)rand() / RAND_MAX;
y = (T)rand() / RAND_MAX;
z = (T)rand() / RAND_MAX;
x = (T)rand() / (T)RAND_MAX;
y = (T)rand() / (T)RAND_MAX;
z = (T)rand() / (T)RAND_MAX;
}
T distance()
{
Expand Down
90 changes: 86 additions & 4 deletions src/xss-common-argsort.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,8 @@ X86_SIMD_SORT_INLINE void argsort_(type_t *arr,
arrsize_t *arg,
arrsize_t left,
arrsize_t right,
arrsize_t max_iters)
arrsize_t max_iters,
arrsize_t task_threshold)
{
/*
* Resort to std::sort if quicksort isnt making any progress
Expand All @@ -494,11 +495,57 @@ X86_SIMD_SORT_INLINE void argsort_(type_t *arr,
type_t biggest = vtype::type_min();
arrsize_t pivot_index = argpartition_unrolled<vtype, argtype, 4>(
arr, arg, left, right + 1, pivot, &smallest, &biggest);
#ifdef XSS_COMPILE_OPENMP
if (pivot != smallest) {
bool parallel_left = (pivot_index - left) > task_threshold;
if (parallel_left) {
#pragma omp task
argsort_<vtype, argtype>(arr,
arg,
left,
pivot_index - 1,
max_iters - 1,
task_threshold);
}
else {
argsort_<vtype, argtype>(arr,
arg,
left,
pivot_index - 1,
max_iters - 1,
task_threshold);
}
}
if (pivot != biggest) {
bool parallel_right = (right - pivot_index) > task_threshold;

if (parallel_right) {
#pragma omp task
argsort_<vtype, argtype>(arr,
arg,
pivot_index,
right,
max_iters - 1,
task_threshold);
}
else {
argsort_<vtype, argtype>(arr,
arg,
pivot_index,
right,
max_iters - 1,
task_threshold);
}
}
#else
UNUSED(task_threshold);
if (pivot != smallest)
argsort_<vtype, argtype>(
arr, arg, left, pivot_index - 1, max_iters - 1);
arr, arg, left, pivot_index - 1, max_iters - 1, 0);
if (pivot != biggest)
argsort_<vtype, argtype>(arr, arg, pivot_index, right, max_iters - 1);
argsort_<vtype, argtype>(
arr, arg, pivot_index, right, max_iters - 1, 0);
#endif
}

template <typename vtype, typename argtype, typename type_t>
Expand Down Expand Up @@ -570,8 +617,43 @@ X86_SIMD_SORT_INLINE void xss_argsort(T *arr,
}
}
UNUSED(hasnan);

#ifdef XSS_COMPILE_OPENMP

bool use_parallel = arrsize > 10000;

if (use_parallel) {
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
constexpr int thread_limit = 8;
int thread_count = std::min(thread_limit, omp_get_max_threads());
arrsize_t task_threshold
= std::max((arrsize_t)10000, arrsize / 100);

// We use omp parallel and then omp single to setup the threads that will run the omp task calls in qsort_
// The omp single prevents multiple threads from running the initial qsort_ simultaneously and causing problems
// Note that we do not use the if(...) clause built into OpenMP, because it causes a performance regression for small arrays
#pragma omp parallel num_threads(thread_count)
#pragma omp single
argsort_<vectype, argtype>(arr,
arg,
0,
arrsize - 1,
2 * (arrsize_t)log2(arrsize),
task_threshold);
#pragma omp taskwait
}
else {
argsort_<vectype, argtype>(arr,
arg,
0,
arrsize - 1,
2 * (arrsize_t)log2(arrsize),
std::numeric_limits<arrsize_t>::max());
}
#else
argsort_<vectype, argtype>(
arr, arg, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize));
arr, arg, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize), 0);
#endif

if (descending) { std::reverse(arg, arg + arrsize); }
}
Expand Down
2 changes: 1 addition & 1 deletion src/xss-common-keyvaluesort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
index_last_elem,
maxiters,
task_threshold);
#pragma omp taskwait
}
else {
kvsort_<keytype, valtype>(keys,
Expand All @@ -636,7 +637,6 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
maxiters,
std::numeric_limits<arrsize_t>::max());
}
#pragma omp taskwait
#else
kvsort_<keytype, valtype>(
keys, indexes, 0, index_last_elem, maxiters, 0);
Expand Down
2 changes: 1 addition & 1 deletion src/xss-common-qsort.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,7 @@ X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize, bool hasnan)
arrsize - 1,
2 * (arrsize_t)log2(arrsize),
task_threshold);
#pragma omp taskwait
}
else {
qsort_<vtype, comparator, T>(arr,
Expand All @@ -690,7 +691,6 @@ X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize, bool hasnan)
2 * (arrsize_t)log2(arrsize),
std::numeric_limits<arrsize_t>::max());
}
#pragma omp taskwait
#else
qsort_<vtype, comparator, T>(
arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize), 0);
Expand Down
4 changes: 2 additions & 2 deletions tests/test-qsort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ TYPED_TEST_P(simdsort, test_argsort_ascending)
{
for (auto type : this->arrtype) {
bool hasnan = is_nan_test(type);
for (auto size : this->arrsize) {
for (auto size : this->arrsize_long) {
std::vector<TypeParam> arr = get_array<TypeParam>(type, size);
std::vector<TypeParam> sortedarr = arr;

Expand All @@ -110,7 +110,7 @@ TYPED_TEST_P(simdsort, test_argsort_descending)
{
for (auto type : this->arrtype) {
bool hasnan = is_nan_test(type);
for (auto size : this->arrsize) {
for (auto size : this->arrsize_long) {
std::vector<TypeParam> arr = get_array<TypeParam>(type, size);
std::vector<TypeParam> sortedarr = arr;

Expand Down
Loading