|
1 | 1 | #include "bench-qsort-common.h"
|
2 | 2 |
|
3 |
| -template <typename T> |
4 |
| -static void avx512_qsort(benchmark::State& state) { |
5 |
| - if (!cpu_has_avx512bw()) { |
6 |
| - state.SkipWithMessage("Requires AVX512 BW ISA"); |
7 |
| - } |
8 |
| - if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { |
9 |
| - state.SkipWithMessage("Requires AVX512 VBMI2 ISA"); |
10 |
| - } |
| 3 | +template <typename T, class... Args> |
| 4 | +static void stdsort(benchmark::State &state, Args &&...args) |
| 5 | +{ |
| 6 | + auto args_tuple = std::make_tuple(std::move(args)...); |
11 | 7 | // Perform setup here
|
12 |
| - size_t ARRSIZE = state.range(0); |
| 8 | + size_t ARRSIZE = std::get<0>(args_tuple); |
13 | 9 | std::vector<T> arr;
|
14 | 10 | std::vector<T> arr_bkp;
|
15 | 11 |
|
16 |
| - /* Initialize elements */ |
17 |
| - arr = get_uniform_rand_array<T>(ARRSIZE); |
| 12 | + std::string arrtype = std::get<1>(args_tuple); |
| 13 | + if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); } |
| 14 | + else if (arrtype == "sorted") { |
| 15 | + arr = get_uniform_rand_array<T>(ARRSIZE); |
| 16 | + std::sort(arr.begin(), arr.end()); |
| 17 | + } |
| 18 | + else if (arrtype == "constant") { |
| 19 | + T temp = get_uniform_rand_array<T>(1)[0]; |
| 20 | + for (size_t ii = 0; ii < ARRSIZE; ++ii) { |
| 21 | + arr.push_back(temp); |
| 22 | + } |
| 23 | + } |
| 24 | + else if (arrtype == "reverse") { |
| 25 | + arr = get_uniform_rand_array<T>(ARRSIZE); |
| 26 | + std::sort(arr.begin(), arr.end()); |
| 27 | + std::reverse(arr.begin(), arr.end()); |
| 28 | + } |
18 | 29 | arr_bkp = arr;
|
19 | 30 |
|
20 | 31 | /* call avx512 quicksort */
|
21 | 32 | for (auto _ : state) {
|
22 |
| - avx512_qsort<T>(arr.data(), ARRSIZE); |
| 33 | + std::sort(arr.begin(), arr.end()); |
23 | 34 | state.PauseTiming();
|
24 | 35 | arr = arr_bkp;
|
25 | 36 | state.ResumeTiming();
|
26 | 37 | }
|
27 | 38 | }
|
28 | 39 |
|
29 |
| -template <typename T> |
30 |
| -static void stdsort(benchmark::State& state) { |
| 40 | +template <typename T, class... Args> |
| 41 | +static void avx512qsort(benchmark::State &state, Args &&...args) |
| 42 | +{ |
| 43 | + auto args_tuple = std::make_tuple(std::move(args)...); |
| 44 | + if (!cpu_has_avx512bw()) { |
| 45 | + state.SkipWithMessage("Requires AVX512 BW ISA"); |
| 46 | + } |
| 47 | + if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) { |
| 48 | + state.SkipWithMessage("Requires AVX512 VBMI2"); |
| 49 | + } |
31 | 50 | // Perform setup here
|
32 |
| - size_t ARRSIZE = state.range(0); |
| 51 | + size_t ARRSIZE = std::get<0>(args_tuple); |
33 | 52 | std::vector<T> arr;
|
34 | 53 | std::vector<T> arr_bkp;
|
35 | 54 |
|
36 |
| - /* Initialize elements */ |
37 |
| - arr = get_uniform_rand_array<T>(ARRSIZE); |
| 55 | + std::string arrtype = std::get<1>(args_tuple); |
| 56 | + if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); } |
| 57 | + else if (arrtype == "sorted") { |
| 58 | + arr = get_uniform_rand_array<T>(ARRSIZE); |
| 59 | + std::sort(arr.begin(), arr.end()); |
| 60 | + } |
| 61 | + else if (arrtype == "constant") { |
| 62 | + T temp = get_uniform_rand_array<T>(1)[0]; |
| 63 | + for (size_t ii = 0; ii < ARRSIZE; ++ii) { |
| 64 | + arr.push_back(temp); |
| 65 | + } |
| 66 | + } |
| 67 | + else if (arrtype == "reverse") { |
| 68 | + arr = get_uniform_rand_array<T>(ARRSIZE); |
| 69 | + std::sort(arr.begin(), arr.end()); |
| 70 | + std::reverse(arr.begin(), arr.end()); |
| 71 | + } |
38 | 72 | arr_bkp = arr;
|
39 | 73 |
|
40 |
| - /* call std::sort */ |
| 74 | + /* call avx512 quicksort */ |
41 | 75 | for (auto _ : state) {
|
42 |
| - std::sort(arr.begin(), arr.end()); |
| 76 | + avx512_qsort<T>(arr.data(), ARRSIZE); |
43 | 77 | state.PauseTiming();
|
44 | 78 | arr = arr_bkp;
|
45 | 79 | state.ResumeTiming();
|
46 | 80 | }
|
47 | 81 | }
|
48 | 82 |
|
49 |
| -// Register the function as a benchmark |
50 |
| -BENCHMARK(avx512_qsort<float>)->Arg(10000)->Arg(1000000); |
51 |
| -BENCHMARK(stdsort<float>)->Arg(10000)->Arg(1000000); |
52 |
| -BENCHMARK(avx512_qsort<uint32_t>)->Arg(10000)->Arg(1000000); |
53 |
| -BENCHMARK(stdsort<uint32_t>)->Arg(10000)->Arg(1000000); |
54 |
| -BENCHMARK(avx512_qsort<int32_t>)->Arg(10000)->Arg(1000000); |
55 |
| -BENCHMARK(stdsort<int32_t>)->Arg(10000)->Arg(1000000); |
56 |
| - |
57 |
| -BENCHMARK(avx512_qsort<double>)->Arg(10000)->Arg(1000000); |
58 |
| -BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000); |
59 |
| -BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000); |
60 |
| -BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000); |
61 |
| -BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000); |
62 |
| -BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(1000000); |
| 83 | +#define BENCH_ALL(type)\ |
| 84 | + BENCH(avx512qsort, type)\ |
| 85 | + BENCH(stdsort, type) |
63 | 86 |
|
64 |
| -//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000); |
65 |
| -BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000); |
66 |
| -BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000); |
67 |
| -BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000); |
68 |
| -BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(1000000); |
| 87 | +BENCH_ALL(uint64_t) |
| 88 | +BENCH_ALL(int64_t) |
| 89 | +BENCH_ALL(uint32_t) |
| 90 | +BENCH_ALL(int32_t) |
| 91 | +BENCH_ALL(uint16_t) |
| 92 | +BENCH_ALL(int16_t) |
| 93 | +BENCH_ALL(float) |
| 94 | +BENCH_ALL(double) |
0 commit comments