Skip to content

Commit 6709593

Browse files
author
Raghuveer Devulapalli
authored
Merge pull request #13 from mosullivan93/main
Implement partial sorting algorithms
2 parents 568b0e4 + a8664ed commit 6709593

22 files changed

+806
-123
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ meson:
3838
cd builddir && ninja
3939

4040
clean:
41-
$(RM) -rf $(TESTDIR)/*.o $(UTILS)/*.o testexe benchexe builddir
41+
$(RM) -rf $(TESTDIR)/*.o $(BENCHDIR)/*.o $(UTILS)/*.o testexe benchexe builddir

benchmarks/bench-qsort-common.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef AVX512_BENCH_COMMON
2+
#define AVX512_BENCH_COMMON
3+
4+
#include <benchmark/benchmark.h>
5+
#include "rand_array.h"
6+
#include "cpuinfo.h"
7+
#include "avx512-16bit-qsort.hpp"
8+
#include "avx512-32bit-qsort.hpp"
9+
#include "avx512-64bit-qsort.hpp"
10+
11+
#endif

benchmarks/bench_partial_qsort.hpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include "bench-qsort-common.h"
2+
3+
template <typename T>
4+
static void avx512_partial_qsort(benchmark::State& state) {
5+
if (!cpu_has_avx512bw()) {
6+
state.SkipWithMessage("Requires AVX512 BW ISA");
7+
}
8+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
9+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
10+
}
11+
// Perform setup here
12+
int64_t K = state.range(0);
13+
size_t ARRSIZE = 10000;
14+
std::vector<T> arr;
15+
std::vector<T> arr_bkp;
16+
17+
/* Initialize elements */
18+
arr = get_uniform_rand_array<T>(ARRSIZE);
19+
arr_bkp = arr;
20+
21+
/* call avx512_partial_qsort */
22+
for (auto _ : state) {
23+
avx512_partial_qsort<T>(arr.data(), K, ARRSIZE);
24+
25+
state.PauseTiming();
26+
arr = arr_bkp;
27+
state.ResumeTiming();
28+
}
29+
}
30+
31+
template <typename T>
32+
static void stdpartialsort(benchmark::State& state) {
33+
// Perform setup here
34+
int64_t K = state.range(0);
35+
size_t ARRSIZE = 10000;
36+
std::vector<T> arr;
37+
std::vector<T> arr_bkp;
38+
39+
/* Initialize elements */
40+
arr = get_uniform_rand_array<T>(ARRSIZE);
41+
arr_bkp = arr;
42+
43+
/* call std::partial_sort */
44+
for (auto _ : state) {
45+
std::partial_sort(arr.begin(), arr.begin() + K, arr.end());
46+
47+
state.PauseTiming();
48+
arr = arr_bkp;
49+
state.ResumeTiming();
50+
}
51+
}
52+
53+
// Register the function as a benchmark
54+
BENCHMARK(avx512_partial_qsort<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
55+
BENCHMARK(stdpartialsort<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
56+
BENCHMARK(avx512_partial_qsort<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
57+
BENCHMARK(stdpartialsort<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
58+
BENCHMARK(avx512_partial_qsort<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
59+
BENCHMARK(stdpartialsort<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
60+
61+
BENCHMARK(avx512_partial_qsort<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
62+
BENCHMARK(stdpartialsort<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
63+
BENCHMARK(avx512_partial_qsort<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
64+
BENCHMARK(stdpartialsort<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
65+
BENCHMARK(avx512_partial_qsort<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
66+
BENCHMARK(stdpartialsort<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
67+
68+
//BENCHMARK(avx512_partial_qsort<float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
69+
BENCHMARK(avx512_partial_qsort<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
70+
BENCHMARK(stdpartialsort<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
71+
BENCHMARK(avx512_partial_qsort<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
72+
BENCHMARK(stdpartialsort<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);

benchmarks/bench_qselect.hpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include "bench-qsort-common.h"
2+
3+
template <typename T>
4+
static void avx512_qselect(benchmark::State& state) {
5+
if (!cpu_has_avx512bw()) {
6+
state.SkipWithMessage("Requires AVX512 BW ISA");
7+
}
8+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
9+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
10+
}
11+
// Perform setup here
12+
int64_t K = state.range(0);
13+
size_t ARRSIZE = 10000;
14+
std::vector<T> arr;
15+
std::vector<T> arr_bkp;
16+
17+
/* Initialize elements */
18+
arr = get_uniform_rand_array<T>(ARRSIZE);
19+
arr_bkp = arr;
20+
21+
/* call avx512 quickselect */
22+
for (auto _ : state) {
23+
avx512_qselect<T>(arr.data(), K, ARRSIZE);
24+
25+
state.PauseTiming();
26+
arr = arr_bkp;
27+
state.ResumeTiming();
28+
}
29+
}
30+
31+
template <typename T>
32+
static void stdnthelement(benchmark::State& state) {
33+
// Perform setup here
34+
int64_t K = state.range(0);
35+
size_t ARRSIZE = 10000;
36+
std::vector<T> arr;
37+
std::vector<T> arr_bkp;
38+
39+
/* Initialize elements */
40+
arr = get_uniform_rand_array<T>(ARRSIZE);
41+
arr_bkp = arr;
42+
43+
/* call std::nth_element */
44+
for (auto _ : state) {
45+
std::nth_element(arr.begin(), arr.begin() + K, arr.end());
46+
47+
state.PauseTiming();
48+
arr = arr_bkp;
49+
state.ResumeTiming();
50+
}
51+
}
52+
53+
// Register the function as a benchmark
54+
BENCHMARK(avx512_qselect<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
55+
BENCHMARK(stdnthelement<float>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
56+
BENCHMARK(avx512_qselect<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
57+
BENCHMARK(stdnthelement<uint32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
58+
BENCHMARK(avx512_qselect<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
59+
BENCHMARK(stdnthelement<int32_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
60+
61+
BENCHMARK(avx512_qselect<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
62+
BENCHMARK(stdnthelement<double>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
63+
BENCHMARK(avx512_qselect<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
64+
BENCHMARK(stdnthelement<uint64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
65+
BENCHMARK(avx512_qselect<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
66+
BENCHMARK(stdnthelement<int64_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
67+
68+
//BENCHMARK(avx512_qselect<float16>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
69+
BENCHMARK(avx512_qselect<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
70+
BENCHMARK(stdnthelement<uint16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
71+
BENCHMARK(avx512_qselect<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);
72+
BENCHMARK(stdnthelement<int16_t>)->Arg(10)->Arg(100)->Arg(1000)->Arg(5000);

benchmarks/bench_qsort.cpp

Lines changed: 3 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,3 @@
1-
#include <benchmark/benchmark.h>
2-
#include "rand_array.h"
3-
#include "cpuinfo.h"
4-
#include "avx512-16bit-qsort.hpp"
5-
#include "avx512-32bit-qsort.hpp"
6-
#include "avx512-64bit-qsort.hpp"
7-
8-
template <typename T>
9-
static void avx512_qsort(benchmark::State& state) {
10-
if (!cpu_has_avx512bw()) {
11-
state.SkipWithMessage("Requires AVX512 BW ISA");
12-
}
13-
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
14-
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
15-
}
16-
// Perform setup here
17-
size_t ARRSIZE = state.range(0);
18-
std::vector<T> arr;
19-
std::vector<T> arr_bkp;
20-
21-
/* Initialize elements is reverse order */
22-
arr = get_uniform_rand_array<T>(ARRSIZE);
23-
arr_bkp = arr;
24-
25-
/* call avx512 quicksort */
26-
for (auto _ : state) {
27-
avx512_qsort<T>(arr.data(), ARRSIZE);
28-
state.PauseTiming();
29-
arr = arr_bkp;
30-
state.ResumeTiming();
31-
}
32-
}
33-
34-
template <typename T>
35-
static void stdsort(benchmark::State& state) {
36-
// Perform setup here
37-
size_t ARRSIZE = state.range(0);
38-
std::vector<T> arr;
39-
std::vector<T> arr_bkp;
40-
41-
/* Initialize elements is reverse order */
42-
arr = get_uniform_rand_array<T>(ARRSIZE);
43-
arr_bkp = arr;
44-
45-
/* call avx512 quicksort */
46-
for (auto _ : state) {
47-
std::sort(arr.begin(), arr.end());
48-
state.PauseTiming();
49-
arr = arr_bkp;
50-
state.ResumeTiming();
51-
}
52-
}
53-
54-
// Register the function as a benchmark
55-
BENCHMARK(avx512_qsort<float>)->Arg(10000)->Arg(1000000);
56-
BENCHMARK(stdsort<float>)->Arg(10000)->Arg(1000000);
57-
BENCHMARK(avx512_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
58-
BENCHMARK(stdsort<uint32_t>)->Arg(10000)->Arg(1000000);
59-
BENCHMARK(avx512_qsort<int32_t>)->Arg(10000)->Arg(1000000);
60-
BENCHMARK(stdsort<int32_t>)->Arg(10000)->Arg(1000000);
61-
62-
BENCHMARK(avx512_qsort<double>)->Arg(10000)->Arg(1000000);
63-
BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000);
64-
BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
65-
BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000);
66-
BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000);
67-
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(10000000);
68-
69-
//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000);
70-
BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
71-
BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000);
72-
BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000);
73-
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(10000000);
1+
#include "bench_qsort.hpp"
2+
#include "bench_qselect.hpp"
3+
#include "bench_partial_qsort.hpp"

benchmarks/bench_qsort.hpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include "bench-qsort-common.h"
2+
3+
template <typename T>
4+
static void avx512_qsort(benchmark::State& state) {
5+
if (!cpu_has_avx512bw()) {
6+
state.SkipWithMessage("Requires AVX512 BW ISA");
7+
}
8+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
9+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
10+
}
11+
// Perform setup here
12+
size_t ARRSIZE = state.range(0);
13+
std::vector<T> arr;
14+
std::vector<T> arr_bkp;
15+
16+
/* Initialize elements */
17+
arr = get_uniform_rand_array<T>(ARRSIZE);
18+
arr_bkp = arr;
19+
20+
/* call avx512 quicksort */
21+
for (auto _ : state) {
22+
avx512_qsort<T>(arr.data(), ARRSIZE);
23+
state.PauseTiming();
24+
arr = arr_bkp;
25+
state.ResumeTiming();
26+
}
27+
}
28+
29+
template <typename T>
30+
static void stdsort(benchmark::State& state) {
31+
// Perform setup here
32+
size_t ARRSIZE = state.range(0);
33+
std::vector<T> arr;
34+
std::vector<T> arr_bkp;
35+
36+
/* Initialize elements */
37+
arr = get_uniform_rand_array<T>(ARRSIZE);
38+
arr_bkp = arr;
39+
40+
/* call std::sort */
41+
for (auto _ : state) {
42+
std::sort(arr.begin(), arr.end());
43+
state.PauseTiming();
44+
arr = arr_bkp;
45+
state.ResumeTiming();
46+
}
47+
}
48+
49+
// Register the function as a benchmark
50+
BENCHMARK(avx512_qsort<float>)->Arg(10000)->Arg(1000000);
51+
BENCHMARK(stdsort<float>)->Arg(10000)->Arg(1000000);
52+
BENCHMARK(avx512_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
53+
BENCHMARK(stdsort<uint32_t>)->Arg(10000)->Arg(1000000);
54+
BENCHMARK(avx512_qsort<int32_t>)->Arg(10000)->Arg(1000000);
55+
BENCHMARK(stdsort<int32_t>)->Arg(10000)->Arg(1000000);
56+
57+
BENCHMARK(avx512_qsort<double>)->Arg(10000)->Arg(1000000);
58+
BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000);
59+
BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
60+
BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000);
61+
BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000);
62+
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(1000000);
63+
64+
//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000);
65+
BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
66+
BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000);
67+
BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000);
68+
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(1000000);

0 commit comments

Comments
 (0)