From 9a1a3758b488a2ee1e0f06eeccb3936362a99f77 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 26 Feb 2025 14:09:06 -0800 Subject: [PATCH 1/2] Update readme file for static methods --- src/README.md | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/src/README.md b/src/README.md index edbdc68..69d4bf1 100644 --- a/src/README.md +++ b/src/README.md @@ -1,10 +1,15 @@ # x86-simd-sort C++ header file library for SIMD based 16-bit, 32-bit and 64-bit data type -sorting algorithms on x86 processors. We currently have AVX-512 and AVX2 -(32-bit and 64-bit only) based implementation of quicksort, quickselect, -partialsort, argsort, argselect & key-value -sort. The following API's are currently supported: +sorting algorithms on x86 processors. We currently have AVX-512 and AVX2 based +implementation of quicksort, quickselect, partialsort, argsort, argselect & +key-value sort. The static methods can be used by including +`src/x86simdsort-static-incl.h` file. Compiling them with the appropriate +compiler flags will choose either the AVX-512 or AVX2 versions. For AVX-512, we +recommend using -march=skylake-avx512 for 32-bit and 64-bit datatypes, +-march=icelake-client for 16-bit datatype and -march=sapphirerapids for +_Float16. For AVX2 just using -mavx2 will suffice. The following API's are +currently supported: #### Quicksort @@ -13,8 +18,7 @@ Equivalent to `qsort` in `std::sort` in [C++](https://en.cppreference.com/w/cpp/algorithm/sort). ```cpp -void avx512_qsort(T* arr, size_t arrsize, bool hasnan = false, bool descending = false); -void avx2_qsort(T* arr, size_t arrsize, bool hasnan = false, bool descending = false); +void x86simdsortStatic::qsort(T* arr, size_t arrsize, bool hasnan = false, bool descending = false); ``` Supported datatypes: `uint16_t`, `int16_t`, `_Float16`, `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. AVX2 versions currently support @@ -30,8 +34,7 @@ Equivalent to `std::nth_element` in ```cpp -void avx512_qselect(T* arr, size_t k, size_t arrsize, bool hasnan = false, bool descending = false); -void avx2_qselect(T* arr, size_t k, size_t arrsize, bool hasnan = false, bool descending = false); +void x86simdsortStatic::qselect(T* arr, size_t k, size_t arrsize, bool hasnan = false, bool descending = false); ``` Supported datatypes: `uint16_t`, `int16_t`, `_Float16`, `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. AVX2 versions currently support @@ -46,8 +49,7 @@ Equivalent to `std::partial_sort` in ```cpp -void avx512_partial_qsort(T* arr, size_t k, size_t arrsize, bool hasnan = false, bool descending = false) -void avx2_partial_qsort(T* arr, size_t k, size_t arrsize, bool hasnan = false, bool descending = false) +void x86simdsortStatic::partial_qsort(T* arr, size_t k, size_t arrsize, bool hasnan = false, bool descending = false) ``` Supported datatypes: `uint16_t`, `int16_t`, `_Float16`, `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. AVX2 versions currently support @@ -61,8 +63,7 @@ Equivalent to `np.argsort` in [NumPy](https://numpy.org/doc/stable/reference/generated/numpy.argsort.html). ```cpp -void avx512_argsort(T* arr, size_t *arg, size_t arrsize, bool hasnan = false, bool descending = false); -void avx2_argsort(T* arr, size_t *arg, size_t arrsize, bool hasnan = false, bool descending = false); +void x86simdsortStatic::argsort(T* arr, size_t *arg, size_t arrsize, bool hasnan = false, bool descending = false); ``` Supported datatypes: `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. @@ -74,8 +75,7 @@ Equivalent to `np.argselect` in [NumPy](https://numpy.org/doc/stable/reference/generated/numpy.argpartition.html). ```cpp -void avx512_argselect(T* arr, size_t *arg, size_t k, size_t arrsize); -void avx2_argselect(T* arr, size_t *arg, size_t k, size_t arrsize); +void x86simdsortStatic::argselect(T* arr, size_t *arg, size_t k, size_t arrsize); ``` Supported datatypes: `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. @@ -84,10 +84,10 @@ The algorithm resorts to scalar `std::sort` if the array contains NaNs. #### Key-value sort ```cpp -void avx512_qsort_kv(T1* key, T2* value, size_t arrsize); -void avx2_qsort_kv(T1* key, T2* value, size_t arrsize); +void x86simdsortStatic::keyvalue_qsort(T1* key, T2* value, size_t arrsize); ``` -Supported datatypes: `uint64_t`, `int64_t` and `double`. +Supported datatypes: `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and +`double`. ## Algorithm details @@ -106,9 +106,7 @@ source code associated with that paper [3]. ### Sample code `main.cpp` ```cpp -#include "src/xss-common-includes.h" -#include "src/xss-common-qsort.h" -#include "src/avx512-32bit-qsort.hpp" +#include "src/x86simdsort-static-incl.h" int main() { const int ARRSIZE = 1000; @@ -120,7 +118,7 @@ int main() { } /* call avx512 quicksort */ - avx512_qsort(arr.data(), ARRSIZE); + x86simdsortStatic::qsort(arr.data(), ARRSIZE); return 0; } @@ -129,7 +127,8 @@ int main() { ### Build using g++ ``` -g++ main.cpp -mavx512f -mavx512dq -O3 +g++ main.cpp -mavx512f -mavx512dq -O3 /* for AVX-512 */ +g++ main.cpp -mavx2 -O3 /* for AVX2 */ ``` If you are using src files directly, then it is a header file only and we do @@ -142,7 +141,7 @@ to include and build this library with your source code. ## Build requirements The sorting routines relies only on the C++ Standard Library and requires a -relatively modern compiler to build (gcc 8.x and above). +relatively modern compiler to build (ex: gcc 8.x and above). ## Instruction set requirements From 45114c9dcb142c8691b378347362818104a0c91f Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Wed, 26 Feb 2025 21:06:04 -0800 Subject: [PATCH 2/2] Fix API signatures for keyvalue sort --- src/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/README.md b/src/README.md index 69d4bf1..87757b2 100644 --- a/src/README.md +++ b/src/README.md @@ -75,7 +75,7 @@ Equivalent to `np.argselect` in [NumPy](https://numpy.org/doc/stable/reference/generated/numpy.argpartition.html). ```cpp -void x86simdsortStatic::argselect(T* arr, size_t *arg, size_t k, size_t arrsize); +void x86simdsortStatic::argselect(T* arr, size_t *arg, size_t k, size_t arrsize, bool hasnan = false); ``` Supported datatypes: `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. @@ -84,7 +84,7 @@ The algorithm resorts to scalar `std::sort` if the array contains NaNs. #### Key-value sort ```cpp -void x86simdsortStatic::keyvalue_qsort(T1* key, T2* value, size_t arrsize); +void x86simdsortStatic::keyvalue_qsort(T1* key, T2* value, size_t arrsize, bool hasnan = false, bool descending = false); ``` Supported datatypes: `uint32_t`, `int32_t`, `float`, `uint64_t`, `int64_t` and `double`. @@ -127,7 +127,7 @@ int main() { ### Build using g++ ``` -g++ main.cpp -mavx512f -mavx512dq -O3 /* for AVX-512 */ +g++ main.cpp -mavx512f -mavx512dq -mavx512vl -O3 /* for AVX-512 */ g++ main.cpp -mavx2 -O3 /* for AVX2 */ ```