Skip to content

Commit fac59be

Browse files
committed
Merge branch 'asio-uring-web-server' of https://github.com/ashvardanian/BenchmarkingTutorial into asio-uring-web-server
2 parents f7f7693 + 1358a69 commit fac59be

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

.vscode/settings.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"cSpell.words": [
33
"absl",
44
"Accorn",
5+
"accum",
56
"Adelstein",
67
"Andreas",
78
"ashvardanian",
@@ -42,6 +43,7 @@
4243
"Fedor",
4344
"Fugaku",
4445
"Giga",
46+
"GMMA",
4547
"Goodput",
4648
"GOPS",
4749
"GPGPU",
@@ -51,6 +53,7 @@
5153
"HKSTU",
5254
"HMMA",
5355
"Ibireme",
56+
"IMMA",
5457
"IOSQE",
5558
"JeanHeyd",
5659
"jemalloc",
@@ -109,6 +112,7 @@
109112
"sysfs",
110113
"taskset",
111114
"Tera",
115+
"Threadblock",
112116
"TMUL",
113117
"Trettner",
114118
"Unbundling",

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
9494
set(CMAKE_FIND_LIBRARY_PREFIXES ";lib")
9595
find_package(BLAS REQUIRED)
9696

97+
include(CheckFunctionExists)
98+
check_function_exists(openblas_set_num_threads LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
99+
if(LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
100+
add_definitions(-DLESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
101+
endif()
102+
97103
# GTest (required by Google Benchmark)
98104
FetchContent_Declare(
99105
GoogleTest

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ cd less_slow.cpp # Change the directo
5555

5656
sudo apt-get install build-essential cmake g++ # Install default build tools
5757
sudo apt-get install pkg-config liburing-dev # Install liburing for kernel-bypass
58+
sudo apt-get install libopenblas-base # Install numerics libraries
5859

5960
cmake -B build_release -D CMAKE_BUILD_TYPE=Release # Generate the build files
6061
cmake --build build_release --config Release # Build the project

less_slow.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2634,14 +2634,18 @@ std::size_t parse_size_string(std::string const &str) {
26342634
#pragma region Memory Bound Linear Algebra
26352635
#include <cblas.h>
26362636
/**
2637-
*! OpenBLAS defines a `SIZE` macro for internal use, which conflicts with `fmt`
2638-
*! and other code trying to use that name for variable names, so we must undefine it.
2637+
* ! OpenBLAS defines a `SIZE` macro for internal use, which conflicts with `fmt`
2638+
* ! and other code trying to use that name for variable names, so we must undefine it.
26392639
*/
26402640
#undef SIZE
26412641

26422642
template <typename scalar_type_>
26432643
static void cblas_tops(bm::State &state) {
2644+
// ! Not all versions of OpenBLAS define the `openblas_set_num_threads`
2645+
// ! symbol, so we use CMake's `CheckFunctionExists` for that.
2646+
#if defined(LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
26442647
openblas_set_num_threads(physical_cores());
2648+
#endif
26452649

26462650
// BLAS expects leading dimensions: `lda` = `ldb` = `ldc` = `n` for square inputs.
26472651
std::size_t n = static_cast<std::size_t>(state.range(0));
@@ -4229,13 +4233,14 @@ yyjson_alc yyjson_wrap_arena_prepend(arena_t &arena) noexcept {
42294233
*/
42304234

42314235
#if defined(__x86_64__) && defined(__linux__)
4232-
#include <asm/prctl.h> // `ARCH_ENABLE_TAGGED_ADDR`
42334236
#include <sys/syscall.h> // `SYS_arch_prctl`
42344237
static bool enable_pointer_tagging(unsigned long bits = 1) noexcept {
42354238
// The argument is required number of tag bits.
42364239
// It is rounded up to the nearest LAM mode that can provide it.
42374240
// For now only LAM_U57 is supported, with 6 tag bits.
4238-
return syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, bits) == 0;
4241+
// ! This requires kernel 6.2 or newer.
4242+
int _ARCH_ENABLE_TAGGED_ADDR = 0x4002;
4243+
return syscall(SYS_arch_prctl, _ARCH_ENABLE_TAGGED_ADDR, bits) == 0;
42394244
}
42404245
#else
42414246
static bool enable_pointer_tagging(unsigned long = 0) noexcept { return false; }
@@ -6459,7 +6464,8 @@ class rpc_uring55_client {
64596464
static void rpc_uring55(bm::State &state, networking_route_t route, std::size_t batch_size, std::size_t packet_size) {
64606465
auto [major, minor] = fetch_linux_kernel_version();
64616466
if (major < 5 || (major == 5 && minor < 5)) {
6462-
std::string message = std::format("Kernel version {}.{} too old for io_uring 5.0 variant", major, minor);
6467+
std::string message = "Kernel version "s + std::to_string(major) + "."s + std::to_string(minor) +
6468+
" too old for io_uring 5.5 variant"s;
64636469
state.SkipWithError(message.c_str());
64646470
return;
64656471
}
@@ -6794,7 +6800,8 @@ class rpc_uring60_client {
67946800
static void rpc_uring60(bm::State &state, networking_route_t route, std::size_t batch_size, std::size_t packet_size) {
67956801
auto [major, minor] = fetch_linux_kernel_version();
67966802
if (major < 6) {
6797-
std::string message = std::format("Kernel version {}.{} too old for io_uring 6.0 variant", major, minor);
6803+
std::string message = "Kernel version "s + std::to_string(major) + "."s + std::to_string(minor) +
6804+
" too old for io_uring 6.0 variant"s;
67986805
state.SkipWithError(message.c_str());
67996806
return;
68006807
}

0 commit comments

Comments
 (0)