Skip to content

Commit 7bbb92f

Browse files
author
Raghuveer Devulapalli
committed
Define and use xss_get_num_threads()
1 parent 724e92e commit 7bbb92f

File tree

5 files changed

+12
-12
lines changed

5 files changed

+12
-12
lines changed

src/avx512-16bit-qsort.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -552,9 +552,7 @@ avx512_qsort_fp16_helper(uint16_t *arr, arrsize_t arrsize)
552552
bool use_parallel = arrsize > 100000;
553553

554554
if (use_parallel) {
555-
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
556-
constexpr int thread_limit = 8;
557-
int thread_count = std::min(thread_limit, omp_get_max_threads());
555+
int thread_count = xss_get_num_threads();
558556
arrsize_t task_threshold = std::max((arrsize_t)100000, arrsize / 100);
559557

560558
// We use omp parallel and then omp single to setup the threads that will run the omp task calls in qsort_

src/xss-common-argsort.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -629,9 +629,7 @@ X86_SIMD_SORT_INLINE void xss_argsort(T *arr,
629629
bool use_parallel = arrsize > 10000;
630630

631631
if (use_parallel) {
632-
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
633-
constexpr int thread_limit = 8;
634-
int thread_count = std::min(thread_limit, omp_get_max_threads());
632+
int thread_count = xss_get_num_threads();
635633
arrsize_t task_threshold
636634
= std::max((arrsize_t)10000, arrsize / 100);
637635

src/xss-common-includes.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@
8585
#if defined(XSS_USE_OPENMP) && defined(_OPENMP)
8686
#define XSS_COMPILE_OPENMP
8787
#include <omp.h>
88+
89+
X86_SIMD_SORT_INLINE int xss_get_num_threads()
90+
{
91+
// Get the number of physical cores: works only when hyperthreading is
92+
// enabled on all cores
93+
int num_physical_cores = omp_get_num_procs() / 2;
94+
return std::min(num_physical_cores, (int)omp_get_max_threads());
95+
}
8896
#endif
8997

9098
template <class... T>

src/xss-common-keyvaluesort.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -610,9 +610,7 @@ X86_SIMD_SORT_INLINE void xss_qsort_kv(
610610
bool use_parallel = arrsize > 10000;
611611

612612
if (use_parallel) {
613-
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
614-
constexpr int thread_limit = 8;
615-
int thread_count = std::min(thread_limit, omp_get_max_threads());
613+
int thread_count = xss_get_num_threads();
616614
arrsize_t task_threshold
617615
= std::max((arrsize_t)10000, arrsize / 100);
618616

src/xss-common-qsort.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,9 +672,7 @@ X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize, bool hasnan)
672672
bool use_parallel = arrsize > 100000;
673673

674674
if (use_parallel) {
675-
// This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
676-
constexpr int thread_limit = 8;
677-
int thread_count = std::min(thread_limit, omp_get_max_threads());
675+
int thread_count = xss_get_num_threads();
678676
arrsize_t task_threshold
679677
= std::max((arrsize_t)100000, arrsize / 100);
680678

0 commit comments

Comments
 (0)