Skip to content

Commit b8ef0bf

Browse files
authored
Use static_for for fixed-size loops (#771)
Closes #770 This PR replaces naive for loops with `cuda::static_for` in cases where the loop size is known at compile time, enabling guaranteed compile-time unrolling and improving runtime performance. It also updates CMake to treat device compiler warnings as errors and addresses warnings for variables like `gen` being declared but unused, which appears to be a compiler bug in CUDA 12.0.
1 parent 0c113fc commit b8ef0bf

37 files changed

+417
-195
lines changed

CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,30 @@ target_compile_features(cuco INTERFACE cxx_std_17 cuda_std_17)
9494
option(CUCO_DOWNLOAD_ROARING_TESTDATA "Download RoaringFormatSpec test data" ON)
9595
include(${CMAKE_CURRENT_LIST_DIR}/cmake/roaring_testdata.cmake)
9696

97+
###################################################################################################
98+
# - common compile options function ---------------------------------------------------------------
99+
100+
function(cuco_set_common_compile_options target_name)
101+
# Parse optional arguments
102+
cmake_parse_arguments(CUCO_OPTS "ADD_LINEINFO" "" "" ${ARGN})
103+
104+
# Base compile options common to all targets
105+
target_compile_options(${target_name} PRIVATE
106+
--compiler-options=-Wall --compiler-options=-Wextra --compiler-options=-Werror
107+
-Wno-deprecated-gpu-targets --expt-extended-lambda -Werror=all-warnings
108+
)
109+
110+
# Add lineinfo option if requested (typically for benchmarks)
111+
if(CUCO_OPTS_ADD_LINEINFO)
112+
target_compile_options(${target_name} PRIVATE -lineinfo)
113+
endif()
114+
115+
# Add GCC-specific warning suppression only for GCC
116+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
117+
target_compile_options(${target_name} PRIVATE -Xcompiler -Wno-subobject-linkage)
118+
endif()
119+
endfunction()
120+
97121
###################################################################################################
98122
# - optionally build tests ------------------------------------------------------------------------
99123

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,7 @@ function(ConfigureBench BENCH_NAME)
2727
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
2828
target_include_directories(${BENCH_NAME} PRIVATE
2929
"${CMAKE_CURRENT_SOURCE_DIR}")
30-
target_compile_options(${BENCH_NAME} PRIVATE --compiler-options=-Wall --compiler-options=-Wextra
31-
--compiler-options=-Werror -Wno-deprecated-gpu-targets --expt-extended-lambda -lineinfo)
32-
# Add GCC-specific warning suppression only for GCC
33-
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
34-
target_compile_options(${BENCH_NAME} PRIVATE -Xcompiler -Wno-subobject-linkage)
35-
endif()
30+
cuco_set_common_compile_options(${BENCH_NAME} ADD_LINEINFO)
3631
target_link_libraries(${BENCH_NAME} PRIVATE
3732
nvbench::main
3833
pthread

benchmarks/dynamic_map/contains_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_contains(
4343

4444
thrust::device_vector<Key> keys(num_keys);
4545

46-
key_generator gen{};
46+
[[maybe_unused]] key_generator gen{};
4747
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
4848

4949
thrust::device_vector<pair_type> pairs(num_keys);

benchmarks/dynamic_map/erase_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_erase(
4343

4444
thrust::device_vector<Key> keys(num_keys);
4545

46-
key_generator gen{};
46+
[[maybe_unused]] key_generator gen{};
4747
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
4848

4949
thrust::device_vector<pair_type> pairs(num_keys);

benchmarks/dynamic_map/find_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_find(
4343

4444
thrust::device_vector<Key> keys(num_keys);
4545

46-
key_generator gen{};
46+
[[maybe_unused]] key_generator gen{};
4747
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
4848

4949
thrust::device_vector<pair_type> pairs(num_keys);

benchmarks/dynamic_map/insert_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_insert(
4545

4646
thrust::device_vector<Key> keys(num_keys);
4747

48-
key_generator gen{};
48+
[[maybe_unused]] key_generator gen{};
4949
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
5050

5151
thrust::device_vector<pair_type> pairs(num_keys);

benchmarks/dynamic_map/retrieve_all_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_retrieve_all(
4242

4343
thrust::device_vector<Key> keys(num_keys);
4444

45-
key_generator gen{};
45+
[[maybe_unused]] key_generator gen{};
4646
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
4747

4848
thrust::device_vector<pair_type> pairs(num_keys);

benchmarks/hyperloglog/hyperloglog_bench.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ template <class Estimator, class Dist>
5858

5959
thrust::device_vector<T> items(num_items);
6060

61-
key_generator gen{};
61+
[[maybe_unused]] key_generator gen{};
6262
Estimator estimator{cuco::sketch_size_kb(sketch_size_kb)};
6363
double error_sum = 0;
6464
for (std::size_t i = 0; i < num_samples; ++i) {
@@ -97,7 +97,7 @@ void hyperloglog_e2e(nvbench::state& state, nvbench::type_list<T, Dist>)
9797

9898
thrust::device_vector<T> items(num_items);
9999

100-
key_generator gen{};
100+
[[maybe_unused]] key_generator gen{};
101101
gen.generate(dist_from_state<Dist>(state), items.begin(), items.end());
102102

103103
estimator_type estimator{cuco::sketch_size_kb(sketch_size_kb)};
@@ -126,7 +126,7 @@ void hyperloglog_add(nvbench::state& state, nvbench::type_list<T, Dist>)
126126

127127
thrust::device_vector<T> items(num_items);
128128

129-
key_generator gen{};
129+
[[maybe_unused]] key_generator gen{};
130130
gen.generate(dist_from_state<Dist>(state), items.begin(), items.end());
131131

132132
state.add_element_count(num_items);

benchmarks/roaring_bitmap/contains_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void roaring_bitmap_contains(nvbench::state& state, nvbench::type_list<T>)
5555

5656
thrust::device_vector<T> items(num_items);
5757

58-
key_generator gen{};
58+
[[maybe_unused]] key_generator gen{};
5959
gen.generate(distribution::unique{}, items.begin(), items.end());
6060

6161
thrust::device_vector<bool> contained(items.size(), false);

benchmarks/static_map/contains_bench.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_map_contains(
4545

4646
thrust::device_vector<Key> keys(num_keys);
4747

48-
key_generator gen{};
48+
[[maybe_unused]] key_generator gen{};
4949
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
5050

5151
thrust::device_vector<pair_type> pairs(num_keys);

0 commit comments

Comments
 (0)