|
8 | 8 |
|
9 | 9 | * ****************************************************************/
|
10 | 10 |
|
11 |
| -#ifndef AVX512_QSORT_COMMON |
12 |
| -#define AVX512_QSORT_COMMON |
| 11 | +#ifndef XSS_COMMON_QSORT |
| 12 | +#define XSS_COMMON_QSORT |
13 | 13 |
|
14 | 14 | /*
|
15 | 15 | * Quicksort using AVX-512. The ideas and code are based on these two research
|
@@ -549,94 +549,69 @@ X86_SIMD_SORT_INLINE void qselect_(type_t *arr,
|
549 | 549 | qselect_<vtype>(arr, pos, pivot_index, right, max_iters - 1);
|
550 | 550 | }
|
551 | 551 |
|
552 |
| -// Regular quicksort routines: |
553 |
| -template <typename T> |
554 |
| -X86_SIMD_SORT_INLINE void avx512_qsort(T *arr, arrsize_t arrsize) |
555 |
| -{ |
556 |
| - if (arrsize > 1) { |
557 |
| - /* std::is_floating_point_v<_Float16> == False, unless c++-23*/ |
558 |
| - if constexpr (std::is_floating_point_v<T>) { |
559 |
| - arrsize_t nan_count |
560 |
| - = replace_nan_with_inf<zmm_vector<T>>(arr, arrsize); |
561 |
| - qsort_<zmm_vector<T>, T>( |
562 |
| - arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize)); |
563 |
| - replace_inf_with_nan(arr, arrsize, nan_count); |
564 |
| - } |
565 |
| - else { |
566 |
| - qsort_<zmm_vector<T>, T>( |
567 |
| - arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize)); |
568 |
| - } |
569 |
| - } |
570 |
| -} |
571 |
| - |
572 |
| -template <typename T> |
573 |
| -void avx2_qsort(T *arr, arrsize_t arrsize) |
| 552 | +// Quicksort routines: |
| 553 | +template <typename vtype, typename T> |
| 554 | +X86_SIMD_SORT_INLINE void xss_qsort(T *arr, arrsize_t arrsize) |
574 | 555 | {
|
575 |
| - using vtype = avx2_vector<T>; |
576 | 556 | if (arrsize > 1) {
|
577 |
| - /* std::is_floating_point_v<_Float16> == False, unless c++-23*/ |
578 | 557 | if constexpr (std::is_floating_point_v<T>) {
|
579 | 558 | arrsize_t nan_count = replace_nan_with_inf<vtype>(arr, arrsize);
|
580 |
| - qsort_<vtype, T>(arr, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); |
| 559 | + qsort_<vtype, T>(arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize)); |
581 | 560 | replace_inf_with_nan(arr, arrsize, nan_count);
|
582 | 561 | }
|
583 | 562 | else {
|
584 |
| - qsort_<vtype, T>(arr, 0, arrsize - 1, 2 * (int64_t)log2(arrsize)); |
| 563 | + qsort_<vtype, T>(arr, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize)); |
585 | 564 | }
|
586 | 565 | }
|
587 | 566 | }
|
588 | 567 |
|
589 |
| -template <typename T> |
| 568 | +// Quick select methods |
| 569 | +template <typename vtype, typename T> |
590 | 570 | X86_SIMD_SORT_INLINE void
|
591 |
| -avx512_qselect(T *arr, arrsize_t k, arrsize_t arrsize, bool hasnan = false) |
| 571 | +xss_qselect(T *arr, arrsize_t k, arrsize_t arrsize, bool hasnan) |
592 | 572 | {
|
593 | 573 | arrsize_t indx_last_elem = arrsize - 1;
|
594 |
| - /* std::is_floating_point_v<_Float16> == False, unless c++-23*/ |
595 | 574 | if constexpr (std::is_floating_point_v<T>) {
|
596 | 575 | if (UNLIKELY(hasnan)) {
|
597 | 576 | indx_last_elem = move_nans_to_end_of_array(arr, arrsize);
|
598 | 577 | }
|
599 | 578 | }
|
600 | 579 | UNUSED(hasnan);
|
601 | 580 | if (indx_last_elem >= k) {
|
602 |
| - qselect_<zmm_vector<T>, T>( |
| 581 | + qselect_<vtype, T>( |
603 | 582 | arr, k, 0, indx_last_elem, 2 * (arrsize_t)log2(indx_last_elem));
|
604 | 583 | }
|
605 | 584 | }
|
606 | 585 |
|
607 |
| -template <typename T> |
608 |
| -void avx2_qselect(T *arr, arrsize_t k, arrsize_t arrsize, bool hasnan = false) |
| 586 | +// Partial sort methods: |
| 587 | +template <typename vtype, typename T> |
| 588 | +X86_SIMD_SORT_INLINE void |
| 589 | +xss_partial_qsort(T *arr, arrsize_t k, arrsize_t arrsize, bool hasnan) |
609 | 590 | {
|
610 |
| - arrsize_t indx_last_elem = arrsize - 1; |
611 |
| - /* std::is_floating_point_v<_Float16> == False, unless c++-23*/ |
612 |
| - if constexpr (std::is_floating_point_v<T>) { |
613 |
| - if (UNLIKELY(hasnan)) { |
614 |
| - indx_last_elem = move_nans_to_end_of_array(arr, arrsize); |
615 |
| - } |
616 |
| - } |
617 |
| - UNUSED(hasnan); |
618 |
| - if (indx_last_elem >= k) { |
619 |
| - qselect_<avx2_vector<T>, T>( |
620 |
| - arr, k, 0, indx_last_elem, 2 * (int64_t)log2(indx_last_elem)); |
621 |
| - } |
| 591 | + xss_qselect<vtype, T>(arr, k - 1, arrsize, hasnan); |
| 592 | + xss_qsort<vtype, T>(arr, k - 1); |
622 | 593 | }
|
623 | 594 |
|
624 |
| -template <typename T> |
625 |
| -X86_SIMD_SORT_INLINE void avx512_partial_qsort(T *arr, |
626 |
| - arrsize_t k, |
627 |
| - arrsize_t arrsize, |
628 |
| - bool hasnan = false) |
629 |
| -{ |
630 |
| - avx512_qselect<T>(arr, k - 1, arrsize, hasnan); |
631 |
| - avx512_qsort<T>(arr, k - 1); |
632 |
| -} |
| 595 | +#define DEFINE_METHODS(ISA, VTYPE) \ |
| 596 | + template <typename T> \ |
| 597 | + X86_SIMD_SORT_INLINE void ISA##_qsort(T *arr, arrsize_t size) \ |
| 598 | + { \ |
| 599 | + xss_qsort<VTYPE, T>(arr, size); \ |
| 600 | + } \ |
| 601 | + template <typename T> \ |
| 602 | + X86_SIMD_SORT_INLINE void ISA##_qselect( \ |
| 603 | + T *arr, arrsize_t k, arrsize_t size, bool hasnan = false) \ |
| 604 | + { \ |
| 605 | + xss_qselect<VTYPE, T>(arr, k, size, hasnan); \ |
| 606 | + } \ |
| 607 | + template <typename T> \ |
| 608 | + X86_SIMD_SORT_INLINE void ISA##_partial_qsort( \ |
| 609 | + T *arr, arrsize_t k, arrsize_t size, bool hasnan = false) \ |
| 610 | + { \ |
| 611 | + xss_partial_qsort<VTYPE, T>(arr, k, size, hasnan); \ |
| 612 | + } |
633 | 613 |
|
634 |
| -template <typename T> |
635 |
| -inline void |
636 |
| -avx2_partial_qsort(T *arr, arrsize_t k, arrsize_t arrsize, bool hasnan = false) |
637 |
| -{ |
638 |
| - avx2_qselect<T>(arr, k - 1, arrsize, hasnan); |
639 |
| - avx2_qsort<T>(arr, k - 1); |
640 |
| -} |
| 614 | +DEFINE_METHODS(avx512, zmm_vector<T>) |
| 615 | +DEFINE_METHODS(avx2, avx2_vector<T>) |
641 | 616 |
|
642 |
| -#endif // AVX512_QSORT_COMMON |
| 617 | +#endif // XSS_COMMON_QSORT |
0 commit comments