Skip to content

Commit f8d1c22

Browse files
committed
Modify partial sort benchmarking to vary k
1 parent f4bca13 commit f8d1c22

File tree

4 files changed

+64
-80
lines changed

4 files changed

+64
-80
lines changed

benchmarks/bench_partial_qsort.hpp

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,18 @@ static void avx512_partial_qsort(benchmark::State& state) {
99
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
1010
}
1111
// Perform setup here
12-
size_t ARRSIZE = state.range(0);
12+
int64_t K = state.range(0);
13+
size_t ARRSIZE = 10000;
1314
std::vector<T> arr;
1415
std::vector<T> arr_bkp;
1516

1617
/* Initialize elements */
1718
arr = get_uniform_rand_array<T>(ARRSIZE);
1819
arr_bkp = arr;
1920

20-
/* Choose random index to sort up until */
21-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
22-
2321
/* call avx512_partial_qsort */
2422
for (auto _ : state) {
25-
avx512_partial_qsort<T>(arr.data(), k, ARRSIZE);
23+
avx512_partial_qsort<T>(arr.data(), K, ARRSIZE);
2624

2725
state.PauseTiming();
2826
arr = arr_bkp;
@@ -33,20 +31,18 @@ static void avx512_partial_qsort(benchmark::State& state) {
3331
template <typename T>
3432
static void stdpartialsort(benchmark::State& state) {
3533
// Perform setup here
36-
size_t ARRSIZE = state.range(0);
34+
int64_t K = state.range(0);
35+
size_t ARRSIZE = 10000;
3736
std::vector<T> arr;
3837
std::vector<T> arr_bkp;
3938

4039
/* Initialize elements */
4140
arr = get_uniform_rand_array<T>(ARRSIZE);
4241
arr_bkp = arr;
4342

44-
/* Choose random index to sort up until */
45-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
46-
4743
/* call std::partial_sort */
4844
for (auto _ : state) {
49-
std::partial_sort(arr.begin(), arr.begin() + k, arr.end());
45+
std::partial_sort(arr.begin(), arr.begin() + K, arr.end());
5046

5147
state.PauseTiming();
5248
arr = arr_bkp;
@@ -55,22 +51,22 @@ static void stdpartialsort(benchmark::State& state) {
5551
}
5652

5753
// Register the function as a benchmark
58-
BENCHMARK(avx512_partial_qsort<float>)->Arg(10000)->Arg(1000000);
59-
BENCHMARK(stdpartialsort<float>)->Arg(10000)->Arg(1000000);
60-
BENCHMARK(avx512_partial_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
61-
BENCHMARK(stdpartialsort<uint32_t>)->Arg(10000)->Arg(1000000);
62-
BENCHMARK(avx512_partial_qsort<int32_t>)->Arg(10000)->Arg(1000000);
63-
BENCHMARK(stdpartialsort<int32_t>)->Arg(10000)->Arg(1000000);
54+
BENCHMARK(avx512_partial_qsort<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
55+
BENCHMARK(stdpartialsort<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
56+
BENCHMARK(avx512_partial_qsort<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
57+
BENCHMARK(stdpartialsort<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
58+
BENCHMARK(avx512_partial_qsort<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
59+
BENCHMARK(stdpartialsort<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
6460

65-
BENCHMARK(avx512_partial_qsort<double>)->Arg(10000)->Arg(1000000);
66-
BENCHMARK(stdpartialsort<double>)->Arg(10000)->Arg(1000000);
67-
BENCHMARK(avx512_partial_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
68-
BENCHMARK(stdpartialsort<uint64_t>)->Arg(10000)->Arg(1000000);
69-
BENCHMARK(avx512_partial_qsort<int64_t>)->Arg(10000)->Arg(1000000);
70-
BENCHMARK(stdpartialsort<int64_t>)->Arg(10000)->Arg(10000000);
61+
BENCHMARK(avx512_partial_qsort<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
62+
BENCHMARK(stdpartialsort<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
63+
BENCHMARK(avx512_partial_qsort<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
64+
BENCHMARK(stdpartialsort<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
65+
BENCHMARK(avx512_partial_qsort<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
66+
BENCHMARK(stdpartialsort<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
7167

72-
//BENCHMARK(avx512_partial_qsort<float16>)->Arg(10000)->Arg(1000000);
73-
BENCHMARK(avx512_partial_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
74-
BENCHMARK(stdpartialsort<uint16_t>)->Arg(10000)->Arg(1000000);
75-
BENCHMARK(avx512_partial_qsort<int16_t>)->Arg(10000)->Arg(1000000);
76-
BENCHMARK(stdpartialsort<int16_t>)->Arg(10000)->Arg(10000000);
68+
//BENCHMARK(avx512_partial_qsort<float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
69+
BENCHMARK(avx512_partial_qsort<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
70+
BENCHMARK(stdpartialsort<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
71+
BENCHMARK(avx512_partial_qsort<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
72+
BENCHMARK(stdpartialsort<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);

benchmarks/bench_qselect.hpp

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,18 @@ static void avx512_qselect(benchmark::State& state) {
99
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
1010
}
1111
// Perform setup here
12-
size_t ARRSIZE = state.range(0);
12+
int64_t K = state.range(0);
13+
size_t ARRSIZE = 10000;
1314
std::vector<T> arr;
1415
std::vector<T> arr_bkp;
1516

1617
/* Initialize elements */
1718
arr = get_uniform_rand_array<T>(ARRSIZE);
1819
arr_bkp = arr;
1920

20-
/* Choose random index to make sorted */
21-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE - 1, 0).front();
22-
2321
/* call avx512 quickselect */
2422
for (auto _ : state) {
25-
avx512_qselect<T>(arr.data(), k, ARRSIZE);
23+
avx512_qselect<T>(arr.data(), K, ARRSIZE);
2624

2725
state.PauseTiming();
2826
arr = arr_bkp;
@@ -33,20 +31,18 @@ static void avx512_qselect(benchmark::State& state) {
3331
template <typename T>
3432
static void stdnthelement(benchmark::State& state) {
3533
// Perform setup here
36-
size_t ARRSIZE = state.range(0);
34+
int64_t K = state.range(0);
35+
size_t ARRSIZE = 10000;
3736
std::vector<T> arr;
3837
std::vector<T> arr_bkp;
3938

4039
/* Initialize elements */
4140
arr = get_uniform_rand_array<T>(ARRSIZE);
4241
arr_bkp = arr;
4342

44-
/* Choose random index to make sorted */
45-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE - 1, 0).front();
46-
4743
/* call std::nth_element */
4844
for (auto _ : state) {
49-
std::nth_element(arr.begin(), arr.begin() + k, arr.end());
45+
std::nth_element(arr.begin(), arr.begin() + K, arr.end());
5046

5147
state.PauseTiming();
5248
arr = arr_bkp;
@@ -55,22 +51,22 @@ static void stdnthelement(benchmark::State& state) {
5551
}
5652

5753
// Register the function as a benchmark
58-
BENCHMARK(avx512_qselect<float>)->Arg(10000)->Arg(1000000);
59-
BENCHMARK(stdnthelement<float>)->Arg(10000)->Arg(1000000);
60-
BENCHMARK(avx512_qselect<uint32_t>)->Arg(10000)->Arg(1000000);
61-
BENCHMARK(stdnthelement<uint32_t>)->Arg(10000)->Arg(1000000);
62-
BENCHMARK(avx512_qselect<int32_t>)->Arg(10000)->Arg(1000000);
63-
BENCHMARK(stdnthelement<int32_t>)->Arg(10000)->Arg(1000000);
54+
BENCHMARK(avx512_qselect<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
55+
BENCHMARK(stdnthelement<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
56+
BENCHMARK(avx512_qselect<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
57+
BENCHMARK(stdnthelement<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
58+
BENCHMARK(avx512_qselect<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
59+
BENCHMARK(stdnthelement<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
6460

65-
BENCHMARK(avx512_qselect<double>)->Arg(10000)->Arg(1000000);
66-
BENCHMARK(stdnthelement<double>)->Arg(10000)->Arg(1000000);
67-
BENCHMARK(avx512_qselect<uint64_t>)->Arg(10000)->Arg(1000000);
68-
BENCHMARK(stdnthelement<uint64_t>)->Arg(10000)->Arg(1000000);
69-
BENCHMARK(avx512_qselect<int64_t>)->Arg(10000)->Arg(1000000);
70-
BENCHMARK(stdnthelement<int64_t>)->Arg(10000)->Arg(10000000);
61+
BENCHMARK(avx512_qselect<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
62+
BENCHMARK(stdnthelement<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
63+
BENCHMARK(avx512_qselect<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
64+
BENCHMARK(stdnthelement<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
65+
BENCHMARK(avx512_qselect<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
66+
BENCHMARK(stdnthelement<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
7167

72-
//BENCHMARK(avx512_qselect<float16>)->Arg(10000)->Arg(1000000);
73-
BENCHMARK(avx512_qselect<uint16_t>)->Arg(10000)->Arg(1000000);
74-
BENCHMARK(stdnthelement<uint16_t>)->Arg(10000)->Arg(1000000);
75-
BENCHMARK(avx512_qselect<int16_t>)->Arg(10000)->Arg(1000000);
76-
BENCHMARK(stdnthelement<int16_t>)->Arg(10000)->Arg(10000000);
68+
//BENCHMARK(avx512_qselect<float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
69+
BENCHMARK(avx512_qselect<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
70+
BENCHMARK(stdnthelement<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
71+
BENCHMARK(avx512_qselect<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
72+
BENCHMARK(stdnthelement<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);

benchmarks/bench_qsort.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,10 @@ BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000);
5959
BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
6060
BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000);
6161
BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000);
62-
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(10000000);
62+
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(1000000);
6363

6464
//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000);
6565
BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
6666
BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000);
6767
BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000);
68-
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(10000000);
68+
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(1000000);

benchmarks/bench_qsortfp16.cpp

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ template <typename T>
6666
static void avx512_qselect(benchmark::State& state) {
6767
if (cpu_has_avx512fp16()) {
6868
// Perform setup here
69-
size_t ARRSIZE = state.range(0);
69+
int64_t K = state.range(0);
70+
size_t ARRSIZE = 10000;
7071
std::vector<T> arr;
7172
std::vector<T> arr_bkp;
7273

@@ -77,12 +78,9 @@ static void avx512_qselect(benchmark::State& state) {
7778
}
7879
arr_bkp = arr;
7980

80-
/* Choose random index to make sorted */
81-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE - 1, 0).front();
82-
8381
/* call avx512 quickselect */
8482
for (auto _ : state) {
85-
avx512_qselect<T>(arr.data(), k, ARRSIZE);
83+
avx512_qselect<T>(arr.data(), K, ARRSIZE);
8684

8785
state.PauseTiming();
8886
arr = arr_bkp;
@@ -98,7 +96,8 @@ template <typename T>
9896
static void stdnthelement(benchmark::State& state) {
9997
if (cpu_has_avx512fp16()) {
10098
// Perform setup here
101-
size_t ARRSIZE = state.range(0);
99+
int64_t K = state.range(0);
100+
size_t ARRSIZE = 10000;
102101
std::vector<T> arr;
103102
std::vector<T> arr_bkp;
104103

@@ -109,12 +108,9 @@ static void stdnthelement(benchmark::State& state) {
109108
}
110109
arr_bkp = arr;
111110

112-
/* Choose random index to sort until */
113-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE - 1, 0).front();
114-
115111
/* call std::nth_element */
116112
for (auto _ : state) {
117-
std::nth_element(arr.begin(), arr.begin() + k, arr.end());
113+
std::nth_element(arr.begin(), arr.begin() + K, arr.end());
118114

119115
state.PauseTiming();
120116
arr = arr_bkp;
@@ -127,14 +123,15 @@ static void stdnthelement(benchmark::State& state) {
127123
}
128124

129125
// Register the function as a benchmark
130-
BENCHMARK(avx512_qselect<_Float16>)->Arg(10000)->Arg(1000000);
131-
BENCHMARK(stdnthelement<_Float16>)->Arg(10000)->Arg(1000000);
126+
BENCHMARK(avx512_qselect<_Float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
127+
BENCHMARK(stdnthelement<_Float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
132128

133129
template <typename T>
134130
static void avx512_partial_qsort(benchmark::State& state) {
135131
if (cpu_has_avx512fp16()) {
136132
// Perform setup here
137-
size_t ARRSIZE = state.range(0);
133+
int64_t K = state.range(0);
134+
size_t ARRSIZE = 10000;
138135
std::vector<T> arr;
139136
std::vector<T> arr_bkp;
140137

@@ -145,12 +142,9 @@ static void avx512_partial_qsort(benchmark::State& state) {
145142
}
146143
arr_bkp = arr;
147144

148-
/* Choose random index to sort up until */
149-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
150-
151145
/* call avx512_partial_qsort */
152146
for (auto _ : state) {
153-
avx512_partial_qsort<T>(arr.data(), k, ARRSIZE);
147+
avx512_partial_qsort<T>(arr.data(), K, ARRSIZE);
154148

155149
state.PauseTiming();
156150
arr = arr_bkp;
@@ -166,7 +160,8 @@ template <typename T>
166160
static void stdpartialsort(benchmark::State& state) {
167161
if (cpu_has_avx512fp16()) {
168162
// Perform setup here
169-
size_t ARRSIZE = state.range(0);
163+
int64_t K = state.range(0);
164+
size_t ARRSIZE = 10000;
170165
std::vector<T> arr;
171166
std::vector<T> arr_bkp;
172167

@@ -177,12 +172,9 @@ static void stdpartialsort(benchmark::State& state) {
177172
}
178173
arr_bkp = arr;
179174

180-
/* Choose random index to sort up until */
181-
int k = get_uniform_rand_array<int64_t>(1, ARRSIZE, 1).front();
182-
183175
/* call std::partial_sort */
184176
for (auto _ : state) {
185-
std::partial_sort(arr.begin(), arr.begin() + k, arr.end());
177+
std::partial_sort(arr.begin(), arr.begin() + K, arr.end());
186178

187179
state.PauseTiming();
188180
arr = arr_bkp;
@@ -195,5 +187,5 @@ static void stdpartialsort(benchmark::State& state) {
195187
}
196188

197189
// Register the function as a benchmark
198-
BENCHMARK(avx512_partial_qsort<_Float16>)->Arg(10000)->Arg(1000000);
199-
BENCHMARK(stdpartialsort<_Float16>)->Arg(10000)->Arg(1000000);
190+
BENCHMARK(avx512_partial_qsort<_Float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
191+
BENCHMARK(stdpartialsort<_Float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);

0 commit comments

Comments
 (0)