@@ -363,11 +363,11 @@ template <typename vtype1,
363
363
typename vtype2,
364
364
typename type1_t = typename vtype1::type_t ,
365
365
typename type2_t = typename vtype2::type_t >
366
- X86_SIMD_SORT_INLINE void qsort_64bit_ (type1_t *keys,
367
- type2_t *indexes,
368
- arrsize_t left,
369
- arrsize_t right,
370
- int max_iters)
366
+ X86_SIMD_SORT_INLINE void kvsort_ (type1_t *keys,
367
+ type2_t *indexes,
368
+ arrsize_t left,
369
+ arrsize_t right,
370
+ int max_iters)
371
371
{
372
372
/*
373
373
* Resort to std::sort if quicksort isnt making any progress
@@ -393,32 +393,35 @@ X86_SIMD_SORT_INLINE void qsort_64bit_(type1_t *keys,
393
393
arrsize_t pivot_index = kvpartition_unrolled<vtype1, vtype2, 4 >(
394
394
keys, indexes, left, right + 1 , pivot, &smallest, &biggest);
395
395
if (pivot != smallest) {
396
- qsort_64bit_ <vtype1, vtype2>(
396
+ kvsort_ <vtype1, vtype2>(
397
397
keys, indexes, left, pivot_index - 1 , max_iters - 1 );
398
398
}
399
399
if (pivot != biggest) {
400
- qsort_64bit_ <vtype1, vtype2>(
400
+ kvsort_ <vtype1, vtype2>(
401
401
keys, indexes, pivot_index, right, max_iters - 1 );
402
402
}
403
403
}
404
404
405
- template <typename T1, typename T2>
405
+ template <typename T1,
406
+ typename T2,
407
+ template <typename ...>
408
+ typename full_vector,
409
+ template <typename ...>
410
+ typename half_vector>
406
411
X86_SIMD_SORT_INLINE void
407
- avx512_qsort_kv (T1 *keys, T2 *indexes, arrsize_t arrsize, bool hasnan = false )
412
+ xss_qsort_kv (T1 *keys, T2 *indexes, arrsize_t arrsize, bool hasnan)
408
413
{
409
414
using keytype =
410
415
typename std::conditional<sizeof (T1) != sizeof (T2)
411
416
&& sizeof (T1) == sizeof (int32_t ),
412
- ymm_vector <T1>,
413
- zmm_vector <T1>>::type;
417
+ half_vector <T1>,
418
+ full_vector <T1>>::type;
414
419
using valtype =
415
420
typename std::conditional<sizeof (T1) != sizeof (T2)
416
421
&& sizeof (T2) == sizeof (int32_t ),
417
- ymm_vector<T2>,
418
- zmm_vector<T2>>::type;
419
- /*
420
- * Enable testing the heapsort key-value sort in the CI:
421
- */
422
+ half_vector<T2>,
423
+ full_vector<T2>>::type;
424
+
422
425
#ifdef XSS_TEST_KEYVALUE_BASE_CASE
423
426
int maxiters = -1 ;
424
427
bool minarrsize = true ;
@@ -428,57 +431,43 @@ avx512_qsort_kv(T1 *keys, T2 *indexes, arrsize_t arrsize, bool hasnan = false)
428
431
#endif // XSS_TEST_KEYVALUE_BASE_CASE
429
432
430
433
if (minarrsize) {
431
- arrsize_t nan_count = 0 ;
432
- if constexpr (xss::fp::is_floating_point_v<T1>) {
434
+ if constexpr (std::is_floating_point_v<T1>) {
435
+ arrsize_t nan_count = 0 ;
433
436
if (UNLIKELY (hasnan)) {
434
- nan_count = replace_nan_with_inf<zmm_vector<T1>>(keys, arrsize);
437
+ nan_count
438
+ = replace_nan_with_inf<full_vector<T1>>(keys, arrsize);
435
439
}
440
+ kvsort_<keytype, valtype>(keys,
441
+ indexes,
442
+ 0 ,
443
+ arrsize - 1 ,
444
+ 2 * (arrsize_t )log2 (arrsize));
445
+ replace_inf_with_nan (keys, arrsize, nan_count);
436
446
}
437
447
else {
438
448
UNUSED (hasnan);
449
+ kvsort_<keytype, valtype>(keys,
450
+ indexes,
451
+ 0 ,
452
+ arrsize - 1 ,
453
+ 2 * (arrsize_t )log2 (arrsize));
439
454
}
440
- qsort_64bit_<keytype, valtype>(keys, indexes, 0 , arrsize - 1 , maxiters);
441
- replace_inf_with_nan (keys, arrsize, nan_count);
442
455
}
443
456
}
444
457
445
458
template <typename T1, typename T2>
446
459
X86_SIMD_SORT_INLINE void
447
- avx2_qsort_kv (T1 *keys, T2 *indexes, arrsize_t arrsize, bool hasnan = false )
460
+ avx512_qsort_kv (T1 *keys, T2 *indexes, arrsize_t arrsize, bool hasnan = false )
448
461
{
449
- using keytype =
450
- typename std::conditional<sizeof (T1) != sizeof (T2)
451
- && sizeof (T1) == sizeof (int32_t ),
452
- avx2_half_vector<T1>,
453
- avx2_vector<T1>>::type;
454
- using valtype =
455
- typename std::conditional<sizeof (T1) != sizeof (T2)
456
- && sizeof (T2) == sizeof (int32_t ),
457
- avx2_half_vector<T2>,
458
- avx2_vector<T2>>::type;
462
+ xss_qsort_kv<T1, T2, zmm_vector, ymm_vector>(
463
+ keys, indexes, arrsize, hasnan);
464
+ }
459
465
460
- if (arrsize > 1 ) {
461
- if constexpr (std::is_floating_point_v<T1>) {
462
- arrsize_t nan_count = 0 ;
463
- if (UNLIKELY (hasnan)) {
464
- nan_count
465
- = replace_nan_with_inf<avx2_vector<T1>>(keys, arrsize);
466
- }
467
- qsort_64bit_<keytype, valtype>(keys,
468
- indexes,
469
- 0 ,
470
- arrsize - 1 ,
471
- 2 * (arrsize_t )log2 (arrsize));
472
- replace_inf_with_nan (keys, arrsize, nan_count);
473
- }
474
- else {
475
- UNUSED (hasnan);
476
- qsort_64bit_<keytype, valtype>(keys,
477
- indexes,
478
- 0 ,
479
- arrsize - 1 ,
480
- 2 * (arrsize_t )log2 (arrsize));
481
- }
482
- }
466
+ template <typename T1, typename T2>
467
+ X86_SIMD_SORT_INLINE void
468
+ avx2_qsort_kv (T1 *keys, T2 *indexes, arrsize_t arrsize, bool hasnan = false )
469
+ {
470
+ xss_qsort_kv<T1, T2, avx2_vector, avx2_half_vector>(
471
+ keys, indexes, arrsize, hasnan);
483
472
}
484
473
#endif // AVX512_QSORT_64BIT_KV
0 commit comments