Skip to content

Commit 4454de4

Browse files
authored
Disable --expt-relaxed-constexpr with CCCL enhancements (#595)
With CCCL now more mature, the need to build cuCollections using `--expt-relaxed-constexpr` is no longer necessary. This PR updates the implementations to support disabling `--expt-relaxed-constexpr`.
1 parent b55e38d commit 4454de4

File tree

17 files changed

+96
-87
lines changed

17 files changed

+96
-87
lines changed

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ function(ConfigureBench BENCH_NAME)
3535
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
3636
target_include_directories(${BENCH_NAME} PRIVATE
3737
"${CMAKE_CURRENT_SOURCE_DIR}")
38-
target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr -lineinfo)
38+
target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda -lineinfo)
3939
target_link_libraries(${BENCH_NAME} PRIVATE
4040
nvbench::main
4141
pthread

benchmarks/hash_function/hash_function_bench.cu

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121

2222
#include <nvbench/nvbench.cuh>
2323

24+
#include <cuda/std/cstddef>
2425
#include <thrust/device_vector.h>
2526

26-
#include <cstddef>
2727
#include <cstdint>
2828
#include <type_traits>
2929

@@ -139,8 +139,8 @@ __global__ void string_hash_bench_kernel(
139139
template <typename Hash>
140140
void string_hash_eval(nvbench::state& state, nvbench::type_list<Hash>)
141141
{
142-
static_assert(std::is_same_v<typename Hash::argument_type, std::byte>,
143-
"Argument type must be std::byte");
142+
static_assert(std::is_same_v<typename Hash::argument_type, cuda::std::byte>,
143+
"Argument type must be cuda::std::byte");
144144

145145
bool const materialize_result = false;
146146
constexpr auto block_size = 128;
@@ -164,7 +164,7 @@ void string_hash_eval(nvbench::state& state, nvbench::type_list<Hash>)
164164
: 1);
165165

166166
state.add_element_count(num_keys);
167-
// state.add_global_memory_reads<std::byte>(storage.size() * n_repeats);
167+
// state.add_global_memory_reads<cuda::std::byte>(storage.size() * n_repeats);
168168

169169
state.exec([&](nvbench::launch& launch) {
170170
string_hash_bench_kernel<block_size><<<grid_size, block_size, 0, launch.get_stream()>>>(
@@ -196,12 +196,13 @@ NVBENCH_BENCH_TYPES(
196196
.set_max_noise(cuco::benchmark::defaults::MAX_NOISE)
197197
.add_int64_axis("NumInputs", {cuco::benchmark::defaults::N * 10});
198198

199-
NVBENCH_BENCH_TYPES(string_hash_eval,
200-
NVBENCH_TYPE_AXES(nvbench::type_list<cuco::murmurhash3_32<std::byte>,
201-
cuco::xxhash_32<std::byte>,
202-
cuco::xxhash_64<std::byte>,
203-
cuco::murmurhash3_x86_128<std::byte>,
204-
cuco::murmurhash3_x64_128<std::byte>>))
199+
NVBENCH_BENCH_TYPES(
200+
string_hash_eval,
201+
NVBENCH_TYPE_AXES(nvbench::type_list<cuco::murmurhash3_32<cuda::std::byte>,
202+
cuco::xxhash_32<cuda::std::byte>,
203+
cuco::xxhash_64<cuda::std::byte>,
204+
cuco::murmurhash3_x86_128<cuda::std::byte>,
205+
cuco::murmurhash3_x64_128<cuda::std::byte>>))
205206
.set_name("string_hash_function_eval")
206207
.set_type_axes_names({"Hash"})
207208
.set_max_noise(cuco::benchmark::defaults::MAX_NOISE)

examples/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#=============================================================================
2-
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
2+
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@ function(ConfigureExample EXAMPLE_NAME EXAMPLE_SRC)
2525
target_include_directories(${EXAMPLE_NAME} PRIVATE
2626
"${CMAKE_CURRENT_SOURCE_DIR}")
2727
target_compile_options(${EXAMPLE_NAME} PRIVATE --compiler-options=-Wall --compiler-options=-Wextra
28-
--expt-extended-lambda --expt-relaxed-constexpr -Xcompiler -Wno-subobject-linkage)
28+
--expt-extended-lambda -Xcompiler -Wno-subobject-linkage)
2929
target_link_libraries(${EXAMPLE_NAME} PRIVATE cuco CUDA::cudart)
3030
endfunction(ConfigureExample)
3131

examples/static_set/device_subsets_example.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ using ref_type = cuco::static_set_ref<key_type,
6464
storage_ref_type>; ///< Set ref type
6565

6666
/// Sample data to insert and query
67-
__device__ constexpr std::array<key_type, N> data = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
67+
__device__ constexpr cuda::std::array<key_type, N> data = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
6868
/// Empty slots are represented by reserved "sentinel" values. These values should be selected such
6969
/// that they never occur in your input data.
7070
key_type constexpr empty_key_sentinel = -1;

include/cuco/detail/__config

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@
2525
#error "NVCC version 11.5 or later is required"
2626
#endif
2727

28-
#if !defined(__CUDACC_RELAXED_CONSTEXPR__)
29-
#error "Support for relaxed constexpr is required (nvcc flag --expt-relaxed-constexpr)"
30-
#endif
31-
3228
#if !defined(__CUDACC_EXTENDED_LAMBDA__)
3329
#error "Support for extended device lambdas is required (nvcc flag --expt-extended-lambda)"
3430
#endif

include/cuco/detail/hash_functions/murmurhash3.cuh

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
#include <cuco/extent.cuh>
2121

2222
#include <cuda/std/array>
23+
#include <cuda/std/cstddef>
2324
#include <cuda/std/type_traits>
2425

25-
#include <cstddef>
2626
#include <cstdint>
2727

2828
namespace cuco::detail {
@@ -146,7 +146,7 @@ struct MurmurHash3_32 {
146146
*/
147147
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
148148
{
149-
return compute_hash(reinterpret_cast<std::byte const*>(&key),
149+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key),
150150
cuco::extent<std::size_t, sizeof(Key)>{});
151151
}
152152

@@ -160,7 +160,7 @@ struct MurmurHash3_32 {
160160
* @return The resulting hash value
161161
*/
162162
template <typename Extent>
163-
constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
163+
constexpr result_type __host__ __device__ compute_hash(cuda::std::byte const* bytes,
164164
Extent size) const noexcept
165165
{
166166
auto const nblocks = size / 4;
@@ -183,10 +183,14 @@ struct MurmurHash3_32 {
183183
// tail
184184
std::uint32_t k1 = 0;
185185
switch (size & 3) {
186-
case 3: k1 ^= std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 2]) << 16; [[fallthrough]];
187-
case 2: k1 ^= std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 1]) << 8; [[fallthrough]];
186+
case 3:
187+
k1 ^= cuda::std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 2]) << 16;
188+
[[fallthrough]];
189+
case 2:
190+
k1 ^= cuda::std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 1]) << 8;
191+
[[fallthrough]];
188192
case 1:
189-
k1 ^= std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 0]);
193+
k1 ^= cuda::std::to_integer<std::uint32_t>(bytes[nblocks * 4 + 0]);
190194
k1 *= c1;
191195
k1 = rotl32(k1, 15);
192196
k1 *= c2;
@@ -247,7 +251,7 @@ struct MurmurHash3_x64_128 {
247251
*/
248252
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
249253
{
250-
return compute_hash(reinterpret_cast<std::byte const*>(&key),
254+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key),
251255
cuco::extent<std::size_t, sizeof(Key)>{});
252256
}
253257

@@ -261,7 +265,7 @@ struct MurmurHash3_x64_128 {
261265
* @return The resulting hash value
262266
*/
263267
template <typename Extent>
264-
constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
268+
constexpr result_type __host__ __device__ compute_hash(cuda::std::byte const* bytes,
265269
Extent size) const noexcept
266270
{
267271
constexpr std::uint32_t block_size = 16;
@@ -390,7 +394,7 @@ struct MurmurHash3_x86_128 {
390394
*/
391395
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
392396
{
393-
return compute_hash(reinterpret_cast<std::byte const*>(&key),
397+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key),
394398
cuco::extent<std::size_t, sizeof(Key)>{});
395399
}
396400

@@ -404,7 +408,7 @@ struct MurmurHash3_x86_128 {
404408
* @return The resulting hash value
405409
*/
406410
template <typename Extent>
407-
constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
411+
constexpr result_type __host__ __device__ compute_hash(cuda::std::byte const* bytes,
408412
Extent size) const noexcept
409413
{
410414
constexpr std::uint32_t block_size = 16;

include/cuco/detail/hash_functions/utils.cuh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -16,12 +16,14 @@
1616

1717
#pragma once
1818

19+
#include <cuda/std/cstddef>
20+
1921
namespace cuco::detail {
2022

2123
template <typename T, typename U, typename Extent>
2224
constexpr __host__ __device__ T load_chunk(U const* const data, Extent index) noexcept
2325
{
24-
auto const bytes = reinterpret_cast<std::byte const*>(data);
26+
auto const bytes = reinterpret_cast<cuda::std::byte const*>(data);
2527
T chunk;
2628
memcpy(&chunk, bytes + index * sizeof(T), sizeof(T));
2729
return chunk;
@@ -37,4 +39,4 @@ constexpr __host__ __device__ std::uint64_t rotl64(std::uint64_t x, std::int8_t
3739
return (x << r) | (x >> (64 - r));
3840
}
3941

40-
}; // namespace cuco::detail
42+
}; // namespace cuco::detail

include/cuco/detail/hash_functions/xxhash.cuh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
#include <cuco/detail/hash_functions/utils.cuh>
2020
#include <cuco/extent.cuh>
2121

22-
#include <cstddef>
22+
#include <cuda/std/cstddef>
23+
2324
#include <cstdint>
2425

2526
namespace cuco::detail {
@@ -91,10 +92,10 @@ struct XXHash_32 {
9192
{
9293
if constexpr (sizeof(Key) <= 16) {
9394
Key const key_copy = key;
94-
return compute_hash(reinterpret_cast<std::byte const*>(&key_copy),
95+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key_copy),
9596
cuco::extent<std::size_t, sizeof(Key)>{});
9697
} else {
97-
return compute_hash(reinterpret_cast<std::byte const*>(&key),
98+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key),
9899
cuco::extent<std::size_t, sizeof(Key)>{});
99100
}
100101
}
@@ -109,7 +110,7 @@ struct XXHash_32 {
109110
* @return The resulting hash value
110111
*/
111112
template <typename Extent>
112-
constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
113+
constexpr result_type __host__ __device__ compute_hash(cuda::std::byte const* bytes,
113114
Extent size) const noexcept
114115
{
115116
std::size_t offset = 0;
@@ -159,7 +160,7 @@ struct XXHash_32 {
159160
// the following loop is only needed if the size of the key is not a multiple of the block size
160161
if (size % 4) {
161162
while (offset < size) {
162-
h32 += (std::to_integer<std::uint32_t>(bytes[offset]) & 255) * prime5;
163+
h32 += (cuda::std::to_integer<std::uint32_t>(bytes[offset]) & 255) * prime5;
163164
h32 = rotl32(h32, 11) * prime1;
164165
++offset;
165166
}
@@ -254,10 +255,10 @@ struct XXHash_64 {
254255
{
255256
if constexpr (sizeof(Key) <= 16) {
256257
Key const key_copy = key;
257-
return compute_hash(reinterpret_cast<std::byte const*>(&key_copy),
258+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key_copy),
258259
cuco::extent<std::size_t, sizeof(Key)>{});
259260
} else {
260-
return compute_hash(reinterpret_cast<std::byte const*>(&key),
261+
return compute_hash(reinterpret_cast<cuda::std::byte const*>(&key),
261262
cuco::extent<std::size_t, sizeof(Key)>{});
262263
}
263264
}
@@ -272,7 +273,7 @@ struct XXHash_64 {
272273
* @return The resulting hash value
273274
*/
274275
template <typename Extent>
275-
constexpr result_type __host__ __device__ compute_hash(std::byte const* bytes,
276+
constexpr result_type __host__ __device__ compute_hash(cuda::std::byte const* bytes,
276277
Extent size) const noexcept
277278
{
278279
std::size_t offset = 0;
@@ -357,7 +358,7 @@ struct XXHash_64 {
357358
// block size
358359
if (size % 4) {
359360
while (offset < size) {
360-
h64 ^= (std::to_integer<std::uint32_t>(bytes[offset]) & 0xff) * prime5;
361+
h64 ^= (cuda::std::to_integer<std::uint32_t>(bytes[offset]) & 0xff) * prime5;
361362
h64 = rotl64(h64, 11) * prime1;
362363
++offset;
363364
}

include/cuco/detail/hyperloglog/hyperloglog_ref.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -475,8 +475,8 @@ class hyperloglog_ref {
475475
cuco::sketch_size_kb sketch_size_kb) noexcept
476476
{
477477
// minimum precision is 4 or 64 bytes
478-
return std::max(static_cast<std::size_t>(sizeof(register_type) * 1ull << 4),
479-
cuda::std::bit_floor(static_cast<std::size_t>(sketch_size_kb * 1024)));
478+
return cuda::std::max(static_cast<std::size_t>(sizeof(register_type) * 1ull << 4),
479+
cuda::std::bit_floor(static_cast<std::size_t>(sketch_size_kb * 1024)));
480480
}
481481

482482
/**
@@ -493,7 +493,7 @@ class hyperloglog_ref {
493493
// https://github.com/apache/spark/blob/6a27789ad7d59cd133653a49be0bb49729542abe/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala#L43
494494

495495
// minimum precision is 4 or 64 bytes
496-
auto const precision = std::max(
496+
auto const precision = cuda::std::max(
497497
static_cast<int32_t>(4),
498498
static_cast<int32_t>(
499499
cuda::std::ceil(2.0 * cuda::std::log(1.106 / standard_deviation) / cuda::std::log(2.0))));

include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ class open_addressing_ref_impl {
213213
*
214214
* @return The key equality predicate
215215
*/
216-
[[nodiscard]] __device__ constexpr detail::equal_wrapper<key_type, key_equal> const& predicate()
217-
const noexcept
216+
[[nodiscard]] __host__ __device__ constexpr detail::equal_wrapper<key_type, key_equal> const&
217+
predicate() const noexcept
218218
{
219219
return this->predicate_;
220220
}
@@ -255,7 +255,7 @@ class open_addressing_ref_impl {
255255
*
256256
* @return The non-owning storage ref of the container
257257
*/
258-
[[nodiscard]] __device__ constexpr storage_ref_type const& storage_ref() const noexcept
258+
[[nodiscard]] __host__ __device__ constexpr storage_ref_type const& storage_ref() const noexcept
259259
{
260260
return storage_ref_;
261261
}
@@ -1142,7 +1142,8 @@ class open_addressing_ref_impl {
11421142
* @return The key
11431143
*/
11441144
template <typename Value>
1145-
[[nodiscard]] __device__ constexpr auto const& extract_key(Value const& value) const noexcept
1145+
[[nodiscard]] __host__ __device__ constexpr auto const& extract_key(
1146+
Value const& value) const noexcept
11461147
{
11471148
if constexpr (this->has_payload) {
11481149
return thrust::raw_reference_cast(value).first;

0 commit comments

Comments
 (0)