Skip to content

Commit a27448f

Browse files
committed
Make: USE_BLAS option
1 parent 2c44f55 commit a27448f

File tree

4 files changed

+43
-10
lines changed

4 files changed

+43
-10
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
"Klemens",
6565
"Kohlhoff",
6666
"Kulukundis",
67+
"LAPACK",
6768
"Lelbach",
6869
"Lemire",
6970
"Lib",

CMakeLists.txt

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,22 @@ else ()
7272
set(_SHOULD_USE_INTEL_TBB OFF)
7373
endif ()
7474

75+
# Probe for BLAS support
76+
set(CMAKE_FIND_LIBRARY_PREFIXES ";lib")
77+
find_package(BLAS QUIET)
78+
if (BLAS_FOUND)
79+
set(_SHOULD_USE_BLAS ON)
80+
else ()
81+
set(_SHOULD_USE_BLAS OFF)
82+
endif ()
83+
7584
option(USE_INTEL_TBB "Use Intel TBB for parallel STL algorithms" ${_SHOULD_USE_INTEL_TBB})
7685
option(USE_NVIDIA_CCCL "Use Nvidia CCCL for CUDA acceleration" ${_SHOULD_USE_NVIDIA_CCCL})
86+
option(USE_BLAS "Use BLAS for linear algebra" ${_SHOULD_USE_BLAS})
7787

7888
message(STATUS "USE_INTEL_TBB: ${USE_INTEL_TBB}")
7989
message(STATUS "USE_NVIDIA_CCCL: ${USE_NVIDIA_CCCL}")
90+
message(STATUS "USE_BLAS: ${USE_BLAS}")
8091

8192
# ------------------------------------------------------------------------------
8293
# Dependencies
@@ -102,13 +113,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
102113
# ~~~
103114
#
104115
# Moreover, CMake sometimes fails to find it on Windows: https://stackoverflow.com/a/78335726/2766161
105-
set(CMAKE_FIND_LIBRARY_PREFIXES ";lib")
106-
find_package(BLAS REQUIRED)
116+
if (USE_BLAS)
117+
find_package(BLAS REQUIRED)
107118

108-
include(CheckFunctionExists)
109-
check_function_exists(openblas_set_num_threads LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
110-
if (LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
111-
add_definitions(-DLESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
119+
include(CheckFunctionExists)
120+
check_function_exists(openblas_set_num_threads LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
121+
if (LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
122+
add_definitions(-D LESS_SLOW_HAS_OPENBLAS_SET_NUM_THREADS)
123+
endif ()
112124
endif ()
113125

114126
# GTest (required by Google Benchmark)
@@ -346,7 +358,7 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64|x64")
346358
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64")
347359
if (APPLE)
348360
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv8.6-a+bf16")
349-
endif()
361+
endif ()
350362
set_source_files_properties(less_slow_aarch64.S PROPERTIES LANGUAGE ASM)
351363
target_sources(less_slow PRIVATE less_slow_aarch64.S)
352364
endif ()
@@ -431,6 +443,7 @@ endif ()
431443
# ------------------------------------------------------------------------------
432444
target_compile_definitions(less_slow PRIVATE USE_NVIDIA_CCCL=$<BOOL:${USE_NVIDIA_CCCL}>)
433445
target_compile_definitions(less_slow PRIVATE USE_INTEL_TBB=$<BOOL:${USE_INTEL_TBB}>)
446+
target_compile_definitions(less_slow PRIVATE USE_BLAS=$<BOOL:${USE_BLAS}>)
434447
target_link_libraries(
435448
less_slow
436449
PRIVATE Threads::Threads
@@ -448,9 +461,12 @@ target_link_libraries(
448461
absl::flat_hash_map
449462
nlohmann_json::nlohmann_json
450463
Eigen3::Eigen
451-
${BLAS_LIBRARIES}
452464
)
453465

466+
if (USE_BLAS)
467+
target_link_libraries(less_slow PRIVATE ${BLAS_LIBRARIES})
468+
endif ()
469+
454470
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
455471
# target_include_directories(less_slow PRIVATE ${LIBURING_INCLUDE_DIRS})
456472
target_link_libraries(less_slow PRIVATE ${LIBURING_LIBRARIES})

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ The build will pull and compile several third-party dependencies from the source
8686
- Nvidia's [CCCL](https://github.com/nvidia/cccl) for GPU-accelerated algorithms.
8787
- Nvidia's [CUTLASS](https://github.com/nvidia/cutlass) for GPU-accelerated Linear Algebra.
8888

89-
To build without Parallel STL, Intel TBB, and CUDA:
89+
To build without Parallel STL, Intel TBB, BLAS, and CUDA:
9090

9191
```sh
92-
cmake -B build_release -D CMAKE_BUILD_TYPE=Release -D USE_INTEL_TBB=OFF -D USE_NVIDIA_CCCL=OFF
92+
cmake -B build_release -D CMAKE_BUILD_TYPE=Release -D USE_INTEL_TBB=OFF -D USE_NVIDIA_CCCL=OFF -D USE_BLAS=OFF
9393
cmake --build build_release --config Release
9494
```
9595

less_slow.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2904,6 +2904,20 @@ std::size_t parse_size_string(std::string const &str) {
29042904
#pragma endregion // Non Uniform Memory Access
29052905

29062906
#pragma region Memory Bound Linear Algebra
2907+
2908+
#if !defined(USE_BLAS)
2909+
#if defined(__has_include)
2910+
#if __has_include(<cblas.h>)
2911+
#define USE_BLAS 1
2912+
#else
2913+
#define USE_BLAS 0
2914+
#endif // __has_include(<cblas.h>)
2915+
#else
2916+
#define USE_BLAS 0
2917+
#endif // defined(__has_include)
2918+
#endif // !defined(USE_BLAS)
2919+
2920+
#if USE_BLAS
29072921
#include <cblas.h>
29082922
/**
29092923
* ! OpenBLAS defines a `SIZE` macro for internal use, which conflicts with `fmt`
@@ -2947,6 +2961,8 @@ static void cblas_tops(bm::State &state) {
29472961
BENCHMARK(cblas_tops<float>)->RangeMultiplier(2)->Range(8, 16384)->Complexity(benchmark::oNCubed);
29482962
BENCHMARK(cblas_tops<double>)->RangeMultiplier(2)->Range(8, 16384)->Complexity(benchmark::oNCubed);
29492963

2964+
#endif // USE_BLAS
2965+
29502966
/**
29512967
* Eigen is a high-level C++ library for linear algebra that provides a
29522968
* convenient templated API for matrix operations.

0 commit comments

Comments
 (0)