@@ -525,7 +525,7 @@ static void qsort_(type_t *arr,
525525 arrsize_t left,
526526 arrsize_t right,
527527 arrsize_t max_iters,
528- arrsize_t task_threshold )
528+ struct threadmanager &tm )
529529{
530530 /*
531531 * Resort to std::sort if quicksort isnt making any progress
@@ -562,40 +562,49 @@ static void qsort_(type_t *arr,
562562 type_t leftmostValue = comparator::leftmost (smallest, biggest);
563563 type_t rightmostValue = comparator::rightmost (smallest, biggest);
564564
565- #ifdef XSS_COMPILE_OPENMP
565+ std::thread t1, t2;
566+ bool parallel_left = ((pivot_index - left) > tm.task_threshold )
567+ && (tm.sharedCount < tm.max_thread_count );
566568 if (pivot != leftmostValue) {
567- bool parallel_left = (pivot_index - left) > task_threshold;
568569 if (parallel_left) {
569- #pragma omp task
570- qsort_<vtype, comparator>(
571- arr, left, pivot_index - 1 , max_iters - 1 , task_threshold);
570+ tm.incrementCount (1 );
571+ t1 = std::thread (qsort_<vtype, comparator, type_t >,
572+ arr,
573+ left,
574+ pivot_index - 1 ,
575+ max_iters - 1 ,
576+ std::ref (tm));
572577 }
573578 else {
574579 qsort_<vtype, comparator>(
575- arr, left, pivot_index - 1 , max_iters - 1 , task_threshold );
580+ arr, left, pivot_index - 1 , max_iters - 1 , tm );
576581 }
577582 }
583+ bool parallel_right = ((right - pivot_index) > tm.task_threshold )
584+ && (tm.sharedCount < tm.max_thread_count );
578585 if (pivot != rightmostValue) {
579- bool parallel_right = (right - pivot_index) > task_threshold;
580-
581586 if (parallel_right) {
582- #pragma omp task
583- qsort_<vtype, comparator>(
584- arr, pivot_index, right, max_iters - 1 , task_threshold);
587+ tm.incrementCount (1 );
588+ t2 = std::thread (qsort_<vtype, comparator, type_t >,
589+ arr,
590+ pivot_index,
591+ right,
592+ max_iters - 1 ,
593+ std::ref (tm));
585594 }
586595 else {
587596 qsort_<vtype, comparator>(
588- arr, pivot_index, right, max_iters - 1 , task_threshold );
597+ arr, pivot_index, right, max_iters - 1 , tm );
589598 }
590599 }
591- # else
592- UNUSED (task_threshold );
593-
594- if (pivot != leftmostValue)
595- qsort_<vtype, comparator>(arr, left, pivot_index - 1 , max_iters - 1 , 0 );
596- if (pivot != rightmostValue)
597- qsort_<vtype, comparator>(arr, pivot_index, right, max_iters - 1 , 0 );
598- # endif
600+ if (t1. joinable ()) {
601+ t1. join ( );
602+ tm. incrementCount (- 1 );
603+ }
604+ if (t2. joinable ()) {
605+ t2. join ();
606+ tm. incrementCount (- 1 );
607+ }
599608}
600609
601610template <typename vtype, typename comparator, typename type_t >
@@ -661,40 +670,9 @@ X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize, bool hasnan)
661670
662671 UNUSED (hasnan);
663672
664- #ifdef XSS_COMPILE_OPENMP
665-
666- bool use_parallel = arrsize > 100000 ;
667-
668- if (use_parallel) {
669- // This thread limit was determined experimentally; it may be better for it to be the number of physical cores on the system
670- constexpr int thread_limit = 8 ;
671- int thread_count = std::min (thread_limit, omp_get_max_threads ());
672- arrsize_t task_threshold
673- = std::max ((arrsize_t )100000 , arrsize / 100 );
674-
675- // We use omp parallel and then omp single to setup the threads that will run the omp task calls in qsort_
676- // The omp single prevents multiple threads from running the initial qsort_ simultaneously and causing problems
677- // Note that we do not use the if(...) clause built into OpenMP, because it causes a performance regression for small arrays
678- #pragma omp parallel num_threads(thread_count)
679- #pragma omp single
680- qsort_<vtype, comparator, T>(arr,
681- 0 ,
682- arrsize - 1 ,
683- 2 * (arrsize_t )log2 (arrsize),
684- task_threshold);
685- }
686- else {
687- qsort_<vtype, comparator, T>(arr,
688- 0 ,
689- arrsize - 1 ,
690- 2 * (arrsize_t )log2 (arrsize),
691- std::numeric_limits<arrsize_t >::max ());
692- }
693- #pragma omp taskwait
694- #else
673+ struct threadmanager tm;
695674 qsort_<vtype, comparator, T>(
696- arr, 0 , arrsize - 1 , 2 * (arrsize_t )log2 (arrsize), 0 );
697- #endif
675+ arr, 0 , arrsize - 1 , 2 * (arrsize_t )log2 (arrsize), tm);
698676
699677 replace_inf_with_nan (arr, arrsize, nan_count, descending);
700678 }
0 commit comments