Skip to content

Commit 71a25f9

Browse files
author
Raghuveer Devulapalli
authored
Merge pull request #74 from r-devulap/ifunc
Build shared library with runtime ISA dispatch
2 parents 05183cb + c74bc0e commit 71a25f9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1543
-1948
lines changed

.github/workflows/build-numpy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ on:
1111
jobs:
1212
NumPyMultiarrayTests:
1313

14-
runs-on: ubuntu-latest
14+
runs-on: intel-ubuntu-latest
1515

1616
steps:
1717
- name: Checkout x86-simd-sort

.github/workflows/c-cpp.yml

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ on:
77
branches: [ "main" ]
88

99
jobs:
10-
ICX:
10+
SKL:
1111

12-
runs-on: ubuntu-latest
12+
runs-on: intel-ubuntu-latest
1313

1414
steps:
1515
- uses: actions/checkout@v3
1616

1717
- name: Install dependencies
1818
run: |
1919
sudo apt update
20-
sudo apt -y install g++-10 libgtest-dev meson curl git cmake
20+
sudo apt -y install g++-13 libgtest-dev meson curl git cmake
2121
2222
- name: Install google benchmarks
2323
run: |
@@ -29,33 +29,33 @@ jobs:
2929
3030
- name: Install Intel SDE
3131
run: |
32-
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/732268/sde-external-9.7.0-2022-05-09-lin.tar.xz
32+
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
3333
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
3434
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde
3535
3636
- name: Build
3737
env:
38-
CXX: g++-10
38+
CXX: g++-13
3939
run: |
4040
make clean
41-
meson setup --warnlevel 2 --werror --buildtype plain builddir
41+
meson setup --warnlevel 2 --werror --buildtype release builddir
4242
cd builddir
4343
ninja
4444
45-
- name: Run test suite on ICX
46-
run: sde -icx -- ./builddir/testexe
45+
- name: Run test suite on SKL
46+
run: sde -skl -- ./builddir/testexe
4747

48-
SPR:
48+
SKX:
4949

50-
runs-on: ubuntu-latest
50+
runs-on: intel-ubuntu-latest
5151

5252
steps:
5353
- uses: actions/checkout@v3
5454

5555
- name: Install dependencies
5656
run: |
5757
sudo apt update
58-
sudo apt -y install g++-12 libgtest-dev meson curl git cmake
58+
sudo apt -y install g++-13 libgtest-dev meson curl git cmake
5959
6060
- name: Install google benchmarks
6161
run: |
@@ -67,58 +67,93 @@ jobs:
6767
6868
- name: Install Intel SDE
6969
run: |
70-
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/732268/sde-external-9.7.0-2022-05-09-lin.tar.xz
70+
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
7171
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
7272
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde
7373
7474
- name: Build
7575
env:
76-
CXX: g++-12
76+
CXX: g++-13
7777
run: |
7878
make clean
79-
meson setup --warnlevel 2 --werror --buildtype plain builddir
79+
meson setup --warnlevel 2 --werror --buildtype release builddir
8080
cd builddir
8181
ninja
8282
83-
- name: Run _Float16 test suite on SPR
84-
run: sde -spr -- ./builddir/testexe --gtest_filter="*float16*"
83+
- name: Run test suite on SKX
84+
run: sde -skx -- ./builddir/testexe
8585

86-
compare-benchmarks-with-main:
87-
if: ${{ false }} # disable for now
86+
TGL:
8887

89-
runs-on: ubuntu-latest
88+
runs-on: intel-ubuntu-latest
9089

9190
steps:
9291
- uses: actions/checkout@v3
93-
with:
94-
fetch-depth: 0
95-
path: x86-simd-sort
9692

97-
- name: Specify branch name
98-
working-directory: ${{ github.workspace }}/x86-simd-sort
99-
run: git switch -c pr-branch
93+
- name: Install dependencies
94+
run: |
95+
sudo apt update
96+
sudo apt -y install g++-13 libgtest-dev meson curl git cmake
97+
98+
- name: Install google benchmarks
99+
run: |
100+
git clone https://github.com/google/benchmark.git
101+
cd benchmark
102+
cmake -E make_directory "build"
103+
cmake -E chdir "build" cmake -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_BUILD_TYPE=Release ../
104+
sudo cmake --build "build" --config Release --target install
105+
106+
- name: Install Intel SDE
107+
run: |
108+
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
109+
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
110+
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde
111+
112+
- name: Build
113+
env:
114+
CXX: g++-13
115+
run: |
116+
make clean
117+
meson setup --warnlevel 2 --werror --buildtype release builddir
118+
cd builddir
119+
ninja
120+
- name: Run test suite on TGL
121+
run: sde -tgl -- ./builddir/testexe
122+
123+
SPR:
100124

101-
- uses: actions/setup-python@v4
102-
with:
103-
python-version: '3.9'
125+
runs-on: intel-ubuntu-latest
126+
127+
steps:
128+
- uses: actions/checkout@v3
104129

105130
- name: Install dependencies
106131
run: |
107132
sudo apt update
108-
sudo apt -y install g++-12 libgtest-dev meson curl git cmake
133+
sudo apt -y install g++-13 libgtest-dev meson curl git cmake
109134
110135
- name: Install google benchmarks
111136
run: |
112137
git clone https://github.com/google/benchmark.git
113138
cd benchmark
114-
pip3 install -r tools/requirements.txt
115139
cmake -E make_directory "build"
116140
cmake -E chdir "build" cmake -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_BUILD_TYPE=Release ../
117141
sudo cmake --build "build" --config Release --target install
118142
119-
- name: Run bench-compare
120-
working-directory: ${{ github.workspace }}/x86-simd-sort
143+
- name: Install Intel SDE
144+
run: |
145+
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
146+
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
147+
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde
148+
149+
- name: Build
121150
env:
122-
CXX: g++-12
123-
GBENCH: ${{ github.workspace }}/benchmark
124-
run: bash -x scripts/branch-compare.sh avx
151+
CXX: g++-13
152+
run: |
153+
make clean
154+
meson setup --warnlevel 2 --werror --buildtype release builddir
155+
cd builddir
156+
ninja
157+
158+
- name: Run test suite on SPR
159+
run: sde -spr -- ./builddir/testexe

Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,14 @@ benchexe: $(BENCHOBJS) $(UTILOBJS)
7575

7676
.PHONY: meson
7777
meson:
78-
meson setup --warnlevel 2 --werror --buildtype plain builddir
78+
meson setup --warnlevel 2 --werror --buildtype release builddir
7979
cd builddir && ninja
8080

81+
.PHONY: mesondebug
82+
mesondebug:
83+
meson setup --warnlevel 2 --werror --buildtype debug debug
84+
cd debug && ninja
85+
8186
.PHONY: clean
8287
clean:
8388
$(RM) -rf $(TESTOBJS) $(BENCHOBJS) $(UTILOBJS) testexe benchexe builddir

_clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ KeepEmptyLinesAtTheStartOfBlocks: true
6363
MacroBlockBegin: ''
6464
MacroBlockEnd: ''
6565
MaxEmptyLinesToKeep: 1
66-
NamespaceIndentation: None
66+
NamespaceIndentation: Inner
6767
PenaltyBreakAssignment: 2
6868
PenaltyBreakBeforeFirstCallParameter: 19
6969
PenaltyBreakComment: 300
Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,5 @@
1-
#ifndef AVX512_BENCH_COMMON
2-
#define AVX512_BENCH_COMMON
3-
4-
#include "avx512-16bit-qsort.hpp"
5-
#include "avx512-32bit-qsort.hpp"
6-
#include "avx512-64bit-argsort.hpp"
7-
#include "avx512-64bit-qsort.hpp"
8-
91
#include "rand_array.h"
2+
#include "x86simdsort.h"
103
#include <benchmark/benchmark.h>
114

125
#define MY_BENCHMARK_CAPTURE(func, T, test_case_name, ...) \
@@ -18,11 +11,15 @@
1811
func<T>(st, __VA_ARGS__); \
1912
})))
2013

21-
#define BENCH(func, type) \
22-
MY_BENCHMARK_CAPTURE(func, type, smallrandom_128, 128, std::string("random")); \
23-
MY_BENCHMARK_CAPTURE(func, type, smallrandom_256, 256, std::string("random")); \
24-
MY_BENCHMARK_CAPTURE(func, type, smallrandom_512, 512, std::string("random")); \
25-
MY_BENCHMARK_CAPTURE(func, type, smallrandom_1k, 1024, std::string("random")); \
14+
#define BENCH_SORT(func, type) \
15+
MY_BENCHMARK_CAPTURE( \
16+
func, type, smallrandom_128, 128, std::string("random")); \
17+
MY_BENCHMARK_CAPTURE( \
18+
func, type, smallrandom_256, 256, std::string("random")); \
19+
MY_BENCHMARK_CAPTURE( \
20+
func, type, smallrandom_512, 512, std::string("random")); \
21+
MY_BENCHMARK_CAPTURE( \
22+
func, type, smallrandom_1k, 1024, std::string("random")); \
2623
MY_BENCHMARK_CAPTURE(func, type, random_5k, 5000, std::string("random")); \
2724
MY_BENCHMARK_CAPTURE( \
2825
func, type, random_100k, 100000, std::string("random")); \
@@ -37,4 +34,13 @@
3734
MY_BENCHMARK_CAPTURE( \
3835
func, type, reverse_10k, 10000, std::string("reverse"));
3936

40-
#endif
37+
#define BENCH_PARTIAL(func, type) \
38+
MY_BENCHMARK_CAPTURE(func, type, k10, 10000, 10); \
39+
MY_BENCHMARK_CAPTURE(func, type, k100, 10000, 100); \
40+
MY_BENCHMARK_CAPTURE(func, type, k1000, 10000, 1000); \
41+
MY_BENCHMARK_CAPTURE(func, type, k5000, 10000, 5000);
42+
43+
#include "bench-argsort.hpp"
44+
#include "bench-partial-qsort.hpp"
45+
#include "bench-qselect.hpp"
46+
#include "bench-qsort.hpp"

benchmarks/bench-argsort.hpp

Lines changed: 20 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
#include "bench-qsort-common.h"
2-
31
template <typename T>
4-
std::vector<int64_t> stdargsort(const std::vector<T> &array)
2+
std::vector<size_t> stdargsort(const std::vector<T> &array)
53
{
6-
std::vector<int64_t> indices(array.size());
4+
std::vector<size_t> indices(array.size());
75
std::iota(indices.begin(), indices.end(), 0);
86
std::sort(indices.begin(),
97
indices.end(),
10-
[&array](int64_t left, int64_t right) -> bool {
8+
[&array](size_t left, size_t right) -> bool {
119
// sort indices according to corresponding array element
1210
return array[left] < array[right];
1311
});
@@ -16,77 +14,40 @@ std::vector<int64_t> stdargsort(const std::vector<T> &array)
1614
}
1715

1816
template <typename T, class... Args>
19-
static void stdargsort(benchmark::State &state, Args &&...args)
17+
static void scalarargsort(benchmark::State &state, Args &&...args)
2018
{
19+
// get args
2120
auto args_tuple = std::make_tuple(std::move(args)...);
22-
// Perform setup here
23-
size_t ARRSIZE = std::get<0>(args_tuple);
24-
std::vector<T> arr;
25-
std::vector<int64_t> inx;
26-
21+
size_t arrsize = std::get<0>(args_tuple);
2722
std::string arrtype = std::get<1>(args_tuple);
28-
if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); }
29-
else if (arrtype == "sorted") {
30-
arr = get_uniform_rand_array<T>(ARRSIZE);
31-
std::sort(arr.begin(), arr.end());
32-
}
33-
else if (arrtype == "constant") {
34-
T temp = get_uniform_rand_array<T>(1)[0];
35-
for (size_t ii = 0; ii < ARRSIZE; ++ii) {
36-
arr.push_back(temp);
37-
}
38-
}
39-
else if (arrtype == "reverse") {
40-
arr = get_uniform_rand_array<T>(ARRSIZE);
41-
std::sort(arr.begin(), arr.end());
42-
std::reverse(arr.begin(), arr.end());
43-
}
44-
45-
/* call avx512 quicksort */
23+
// set up array
24+
std::vector<T> arr = get_array<T>(arrtype, arrsize);
25+
std::vector<size_t> inx;
26+
// benchmark
4627
for (auto _ : state) {
4728
inx = stdargsort(arr);
4829
}
4930
}
5031

5132
template <typename T, class... Args>
52-
static void avx512argsort(benchmark::State &state, Args &&...args)
33+
static void simdargsort(benchmark::State &state, Args &&...args)
5334
{
35+
// get args
5436
auto args_tuple = std::make_tuple(std::move(args)...);
55-
if (!__builtin_cpu_supports("avx512bw")) {
56-
state.SkipWithMessage("Requires AVX512 BW ISA");
57-
}
58-
// Perform setup here
59-
size_t ARRSIZE = std::get<0>(args_tuple);
60-
std::vector<T> arr;
61-
std::vector<int64_t> inx;
62-
37+
size_t arrsize = std::get<0>(args_tuple);
6338
std::string arrtype = std::get<1>(args_tuple);
64-
if (arrtype == "random") { arr = get_uniform_rand_array<T>(ARRSIZE); }
65-
else if (arrtype == "sorted") {
66-
arr = get_uniform_rand_array<T>(ARRSIZE);
67-
std::sort(arr.begin(), arr.end());
68-
}
69-
else if (arrtype == "constant") {
70-
T temp = get_uniform_rand_array<T>(1)[0];
71-
for (size_t ii = 0; ii < ARRSIZE; ++ii) {
72-
arr.push_back(temp);
73-
}
74-
}
75-
else if (arrtype == "reverse") {
76-
arr = get_uniform_rand_array<T>(ARRSIZE);
77-
std::sort(arr.begin(), arr.end());
78-
std::reverse(arr.begin(), arr.end());
79-
}
80-
81-
/* call avx512 quicksort */
39+
// set up array
40+
std::vector<T> arr = get_array<T>(arrtype, arrsize);
41+
std::vector<size_t> inx;
42+
// benchmark
8243
for (auto _ : state) {
83-
inx = avx512_argsort<T>(arr.data(), ARRSIZE);
44+
inx = x86simdsort::argsort(arr.data(), arrsize);
8445
}
8546
}
8647

8748
#define BENCH_BOTH(type) \
88-
BENCH(avx512argsort, type) \
89-
BENCH(stdargsort, type)
49+
BENCH_SORT(simdargsort, type) \
50+
BENCH_SORT(scalarargsort, type)
9051

9152
BENCH_BOTH(int64_t)
9253
BENCH_BOTH(uint64_t)

0 commit comments

Comments
 (0)