diff --git a/README.md b/README.md index 707344f..20a46f8 100644 --- a/README.md +++ b/README.md @@ -11,25 +11,26 @@ under `src/` directory. The following routines are currently supported: ## Sort an array of custom defined class objects (uses `O(N)` space) ``` cpp -template -void x86simdsort::object_qsort(T *arr, uint32_t arrsize, Func key_func) +template +void x86simdsort::object_qsort(T *arr, U arrsize, Func key_func) ``` `T` is any user defined struct or class and `arr` is a pointer to the first -element in the array of objects of type `T`. `Func` is a lambda function that -computes the `key` value for each object which is the metric used to sort the -objects. `Func` needs to have the following signature: +element in the array of objects of type `T`. The `arrsize` parameter can be any +32-bit or 64-bit integer type. `Func` is a lambda function that computes the +`key` value for each object which is the metric used to sort the objects. +`Func` needs to have the following signature: ```cpp [] (T obj) -> key_t { key_t key; /* compute key for obj */ return key; } ``` -Note that the return type of the key `key_t` needs to be one of the following -: `[float, uint32_t, int32_t, double, uint64_t, int64_t]`. `object_qsort` has a -space complexity of `O(N)`. Specifically, it requires `arrsize * -sizeof(key_t)` bytes to store a vector with all the keys and an additional -`arrsize * sizeof(uint32_t)` bytes to store the indexes of the object array. -For performance reasons, we support `object_qsort` only when the array size is -less than or equal to `UINT32_MAX`. An example usage of `object_qsort` is +Note that the return type of the key `key_t` needs to be one of the following : +`[float, uint32_t, int32_t, double, uint64_t, int64_t]`. `object_qsort` has a +space complexity of `O(N)`. Specifically, it requires `arrsize * sizeof(key_t)` +bytes to store a vector with all the keys and an additional `arrsize * +sizeof(uint32_t)` bytes to store the indexes of the object array. For +performance reasons, we recommend using `object_qsort` when the array size +is less than or equal to `UINT32_MAX`. An example usage of `object_qsort` is provided in the [examples](#Sort-an-array-of-Points-using-object_qsort) section. Refer to [section](#Performance-of-object_qsort) to get a sense of how fast this is relative to `std::sort`. diff --git a/lib/x86simdsort.h b/lib/x86simdsort.h index c79f264..2e47b6a 100644 --- a/lib/x86simdsort.h +++ b/lib/x86simdsort.h @@ -70,29 +70,32 @@ XSS_EXPORT_SYMBOL void keyvalue_partial_sort(T1 *key, bool descending = false); // sort an object -template -XSS_EXPORT_SYMBOL void object_qsort(T *arr, uint32_t arrsize, Func key_func) +template +XSS_EXPORT_SYMBOL void object_qsort(T *arr, U arrsize, Func key_func) { - /* (1) Create a vector a keys */ - using return_type_of = - typename decltype(std::function {key_func})::result_type; + static_assert(std::is_integral::value, "arrsize must be an integral type"); + static_assert(sizeof(U) == sizeof(int32_t) || sizeof(U) == sizeof(int64_t), + "arrsize must be 32 or 64 bits"); + using return_type_of = typename decltype(std::function{key_func})::result_type; + static_assert(sizeof(return_type_of) == sizeof(int32_t) || sizeof(return_type_of) == sizeof(int64_t), + "key_func return type must be 32 or 64 bits"); std::vector keys(arrsize); - for (size_t ii = 0; ii < arrsize; ++ii) { + for (U ii = 0; ii < arrsize; ++ii) { keys[ii] = key_func(arr[ii]); } /* (2) Call arg based on keys using the keyvalue sort */ - std::vector arg(arrsize); + std::vector arg(arrsize); std::iota(arg.begin(), arg.end(), 0); x86simdsort::keyvalue_qsort(keys.data(), arg.data(), arrsize); /* (3) Permute obj array in-place */ std::vector done(arrsize); - for (size_t i = 0; i < arrsize; ++i) { + for (U i = 0; i < arrsize; ++i) { if (done[i]) { continue; } done[i] = true; - size_t prev_j = i; - size_t j = arg[i]; + U prev_j = i; + U j = arg[i]; while (i != j) { std::swap(arr[prev_j], arr[j]); done[j] = true;