Skip to content

Commit c1cf2da

Browse files
author
Raghuveer Devulapalli
committed
Use the new static methods API from x86-simd-sort
1 parent ef5f10d commit c1cf2da

File tree

3 files changed

+33
-101
lines changed

3 files changed

+33
-101
lines changed

numpy/_core/src/npysort/x86_simd_argsort.dispatch.cpp

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,57 @@
11
#include "x86_simd_qsort.hpp"
22
#ifndef __CYGWIN__
33

4-
#if defined(NPY_HAVE_AVX512_SKX)
5-
#include "x86-simd-sort/src/avx512-64bit-argsort.hpp"
6-
#elif defined(NPY_HAVE_AVX2)
7-
#include "x86-simd-sort/src/avx2-32bit-half.hpp"
8-
#include "x86-simd-sort/src/avx2-32bit-qsort.hpp"
9-
#include "x86-simd-sort/src/avx2-64bit-qsort.hpp"
10-
#include "x86-simd-sort/src/xss-common-argsort.h"
11-
#endif
12-
13-
namespace {
14-
template<typename T>
15-
void x86_argsort(T* arr, size_t* arg, npy_intp num)
16-
{
17-
#if defined(NPY_HAVE_AVX512_SKX)
18-
avx512_argsort(arr, arg, num, true);
19-
#elif defined(NPY_HAVE_AVX2)
20-
avx2_argsort(arr, arg, num, true);
21-
#endif
22-
}
23-
24-
template<typename T>
25-
void x86_argselect(T* arr, size_t* arg, npy_intp kth, npy_intp num)
26-
{
27-
#if defined(NPY_HAVE_AVX512_SKX)
28-
avx512_argselect(arr, arg, kth, num, true);
29-
#elif defined(NPY_HAVE_AVX2)
30-
avx2_argselect(arr, arg, kth, num, true);
31-
#endif
32-
}
33-
} // anonymous
4+
#include "x86-simd-sort/src/x86simdsort-static-incl.h"
345

356
namespace np { namespace qsort_simd {
367

378
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(int32_t *arr, npy_intp* arg, npy_intp num, npy_intp kth)
389
{
39-
x86_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
10+
x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
4011
}
4112
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint32_t *arr, npy_intp* arg, npy_intp num, npy_intp kth)
4213
{
43-
x86_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
14+
x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
4415
}
4516
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(int64_t*arr, npy_intp* arg, npy_intp num, npy_intp kth)
4617
{
47-
x86_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
18+
x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
4819
}
4920
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint64_t*arr, npy_intp* arg, npy_intp num, npy_intp kth)
5021
{
51-
x86_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
22+
x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
5223
}
5324
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(float *arr, npy_intp* arg, npy_intp num, npy_intp kth)
5425
{
55-
x86_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
26+
x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num, true);
5627
}
5728
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(double *arr, npy_intp* arg, npy_intp num, npy_intp kth)
5829
{
59-
x86_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
30+
x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num, true);
6031
}
6132
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(int32_t *arr, npy_intp *arg, npy_intp size)
6233
{
63-
x86_argsort(arr, reinterpret_cast<size_t*>(arg), size);
34+
x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size);
6435
}
6536
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(uint32_t *arr, npy_intp *arg, npy_intp size)
6637
{
67-
x86_argsort(arr, reinterpret_cast<size_t*>(arg), size);
38+
x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size);
6839
}
6940
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(int64_t *arr, npy_intp *arg, npy_intp size)
7041
{
71-
x86_argsort(arr, reinterpret_cast<size_t*>(arg), size);
42+
x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size);
7243
}
7344
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(uint64_t *arr, npy_intp *arg, npy_intp size)
7445
{
75-
x86_argsort(arr, reinterpret_cast<size_t*>(arg), size);
46+
x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size);
7647
}
7748
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(float *arr, npy_intp *arg, npy_intp size)
7849
{
79-
x86_argsort(arr, reinterpret_cast<size_t*>(arg), size);
50+
x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size, true);
8051
}
8152
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(double *arr, npy_intp *arg, npy_intp size)
8253
{
83-
x86_argsort(arr, reinterpret_cast<size_t*>(arg), size);
54+
x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size, true);
8455
}
8556

8657
}} // namespace np::simd
Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,57 @@
11
#include "x86_simd_qsort.hpp"
22
#ifndef __CYGWIN__
33

4-
#if defined(NPY_HAVE_AVX512_SKX)
5-
#include "x86-simd-sort/src/avx512-32bit-qsort.hpp"
6-
#include "x86-simd-sort/src/avx512-64bit-qsort.hpp"
7-
#include "x86-simd-sort/src/avx512-64bit-argsort.hpp"
8-
#elif defined(NPY_HAVE_AVX2)
9-
#include "x86-simd-sort/src/avx2-32bit-qsort.hpp"
10-
#include "x86-simd-sort/src/avx2-64bit-qsort.hpp"
11-
#endif
12-
13-
namespace {
14-
template<typename T>
15-
void x86_qsort(T* arr, npy_intp num)
16-
{
17-
#if defined(NPY_HAVE_AVX512_SKX)
18-
avx512_qsort(arr, num, true);
19-
#elif defined(NPY_HAVE_AVX2)
20-
avx2_qsort(arr, num, true);
21-
#endif
22-
}
23-
24-
template<typename T>
25-
void x86_qselect(T* arr, npy_intp num, npy_intp kth)
26-
{
27-
#if defined(NPY_HAVE_AVX512_SKX)
28-
avx512_qselect(arr, kth, num, true);
29-
#elif defined(NPY_HAVE_AVX2)
30-
avx2_qselect(arr, kth, num, true);
31-
#endif
32-
}
33-
} // anonymous
4+
#include "x86-simd-sort/src/x86simdsort-static-incl.h"
345

356
namespace np { namespace qsort_simd {
36-
#if defined(NPY_HAVE_AVX512_SKX) || defined(NPY_HAVE_AVX2)
377
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int32_t *arr, npy_intp num, npy_intp kth)
388
{
39-
x86_qselect(arr, num, kth);
9+
x86simdsortStatic::qselect(arr, kth, num);
4010
}
4111
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint32_t *arr, npy_intp num, npy_intp kth)
4212
{
43-
x86_qselect(arr, num, kth);
13+
x86simdsortStatic::qselect(arr, kth, num);
4414
}
4515
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int64_t*arr, npy_intp num, npy_intp kth)
4616
{
47-
x86_qselect(arr, num, kth);
17+
x86simdsortStatic::qselect(arr, kth, num);
4818
}
4919
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint64_t*arr, npy_intp num, npy_intp kth)
5020
{
51-
x86_qselect(arr, num, kth);
21+
x86simdsortStatic::qselect(arr, kth, num);
5222
}
5323
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(float *arr, npy_intp num, npy_intp kth)
5424
{
55-
x86_qselect(arr, num, kth);
25+
x86simdsortStatic::qselect(arr, kth, num, true);
5626
}
5727
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(double *arr, npy_intp num, npy_intp kth)
5828
{
59-
x86_qselect(arr, num, kth);
29+
x86simdsortStatic::qselect(arr, kth, num, true);
6030
}
6131
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t *arr, npy_intp num)
6232
{
63-
x86_qsort(arr, num);
33+
x86simdsortStatic::qsort(arr, num);
6434
}
6535
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint32_t *arr, npy_intp num)
6636
{
67-
x86_qsort(arr, num);
37+
x86simdsortStatic::qsort(arr, num);
6838
}
6939
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int64_t *arr, npy_intp num)
7040
{
71-
x86_qsort(arr, num);
41+
x86simdsortStatic::qsort(arr, num);
7242
}
7343
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint64_t *arr, npy_intp num)
7444
{
75-
x86_qsort(arr, num);
45+
x86simdsortStatic::qsort(arr, num);
7646
}
7747
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(float *arr, npy_intp num)
7848
{
79-
x86_qsort(arr, num);
49+
x86simdsortStatic::qsort(arr, num, true);
8050
}
8151
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(double *arr, npy_intp num)
8252
{
83-
x86_qsort(arr, num);
53+
x86simdsortStatic::qsort(arr, num, true);
8454
}
85-
#endif // NPY_HAVE_AVX512_SKX || NPY_HAVE_AVX2
86-
8755
}} // namespace np::qsort_simd
8856

8957
#endif // __CYGWIN__

numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,30 @@
11
#include "x86_simd_qsort.hpp"
22
#ifndef __CYGWIN__
33

4-
#if defined(NPY_HAVE_AVX512_SPR)
5-
#include "x86-simd-sort/src/avx512fp16-16bit-qsort.hpp"
6-
#include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
7-
#elif defined(NPY_HAVE_AVX512_ICL)
8-
#include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
9-
#endif
4+
#include "x86-simd-sort/src/x86simdsort-static-incl.h"
105

116
namespace np { namespace qsort_simd {
127

138
/*
149
* QSelect dispatch functions:
1510
*/
16-
#if defined(NPY_HAVE_AVX512_ICL) || defined(NPY_HAVE_AVX512_SPR)
1711
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(Half *arr, npy_intp num, npy_intp kth)
1812
{
1913
#if defined(NPY_HAVE_AVX512_SPR)
20-
avx512_qselect(reinterpret_cast<_Float16*>(arr), kth, num, true);
14+
x86simdsortStatic::qselect(reinterpret_cast<_Float16*>(arr), kth, num, true);
2115
#else
2216
avx512_qselect_fp16(reinterpret_cast<uint16_t*>(arr), kth, num, true);
2317
#endif
2418
}
2519

2620
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint16_t *arr, npy_intp num, npy_intp kth)
2721
{
28-
avx512_qselect(arr, kth, num);
22+
x86simdsortStatic::qselect(arr, kth, num);
2923
}
3024

3125
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_intp kth)
3226
{
33-
avx512_qselect(arr, kth, num);
27+
x86simdsortStatic::qselect(arr, kth, num);
3428
}
3529

3630
/*
@@ -39,20 +33,19 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_
3933
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size)
4034
{
4135
#if defined(NPY_HAVE_AVX512_SPR)
42-
avx512_qsort(reinterpret_cast<_Float16*>(arr), size, true);
36+
x86simdsortStatic::qsort(reinterpret_cast<_Float16*>(arr), size, true);
4337
#else
4438
avx512_qsort_fp16(reinterpret_cast<uint16_t*>(arr), size, true);
4539
#endif
4640
}
4741
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size)
4842
{
49-
avx512_qsort(arr, size);
43+
x86simdsortStatic::qsort(arr, size);
5044
}
5145
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, npy_intp size)
5246
{
53-
avx512_qsort(arr, size);
47+
x86simdsortStatic::qsort(arr, size);
5448
}
55-
#endif // NPY_HAVE_AVX512_ICL || SPR
5649

5750
}} // namespace np::qsort_simd
5851

0 commit comments

Comments
 (0)