Skip to content

Commit ebf3357

Browse files
committed
Merge remote-tracking branch 'upstream/dev' into bump-rapids-cmake
2 parents c07c69b + 3e04706 commit ebf3357

27 files changed

+207
-174
lines changed

ci/build.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,16 @@ while [ "${#args[@]}" -ne 0 ]; do
131131
esac
132132
done
133133

134+
if [ $VERBOSE ]; then
135+
set -x
136+
fi
137+
134138
# Convert to full paths:
135139
HOST_COMPILER=$(which ${HOST_COMPILER})
136140
CUDA_COMPILER=$(which ${CUDA_COMPILER})
137141
# Make CUDA arch list compatible with cmake
138142
CUDA_ARCHS=$(echo "$CUDA_ARCHS" | tr ' ,' ';;')
139143

140-
if [ $VERBOSE ]; then
141-
set -x
142-
fi
143-
144144
# Begin processing unsets after option parsing
145145
set -u
146146

@@ -217,4 +217,4 @@ if command -v sccache >/dev/null; then
217217
source "./sccache_stats.sh" end
218218
else
219219
echo "sccache stats: N/A"
220-
fi
220+
fi

include/cuco/detail/bloom_filter/bloom_filter_impl.cuh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ class bloom_filter_impl {
183183
// If single thread is optimal, use scalar add
184184
if constexpr (worker_num_threads == 1) {
185185
for (auto i = rank; i < num_keys; i += num_threads) {
186-
typename std::iterator_traits<InputIt>::value_type const& insert_element{*(first + i)};
186+
typename cuda::std::iterator_traits<InputIt>::value_type const& insert_element{
187+
*(first + i)};
187188
this->add(insert_element);
188189
}
189190
} else if constexpr (num_threads == worker_num_threads) { // given CG is optimal CG
@@ -193,7 +194,7 @@ class bloom_filter_impl {
193194
auto const group_iters = cuco::detail::int_div_ceil(num_keys, num_threads);
194195
for (size_type i = 0; (i / num_threads) < group_iters; i += num_threads) {
195196
if (i + rank < num_keys) {
196-
typename std::iterator_traits<InputIt>::value_type const& insert_element{
197+
typename cuda::std::iterator_traits<InputIt>::value_type const& insert_element{
197198
*(first + i + rank)};
198199
hash_value = policy_.hash(insert_element);
199200
block_index = policy_.block_index(hash_value, num_blocks_);
@@ -214,7 +215,7 @@ class bloom_filter_impl {
214215

215216
for (size_type i = 0; (i / num_threads) < group_iters; i += num_threads) {
216217
if (i + rank < num_keys) {
217-
typename std::iterator_traits<InputIt>::value_type const& key{*(first + i + rank)};
218+
typename cuda::std::iterator_traits<InputIt>::value_type const& key{*(first + i + rank)};
218219
hash_value = policy_.hash(key);
219220
block_index = policy_.block_index(hash_value, num_blocks_);
220221
}

include/cuco/detail/bloom_filter/kernels.cuh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717

1818
#include <cuco/detail/utility/cuda.cuh>
1919

20+
#include <cuda/std/iterator>
21+
2022
#include <cooperative_groups.h>
2123

2224
#include <cstdint>
23-
#include <iterator>
2425

2526
namespace cuco::detail::bloom_filter_ns {
2627

@@ -66,7 +67,8 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void add_if_n(
6667

6768
while (idx < n) {
6869
if (pred(*(stencil + idx))) {
69-
typename std::iterator_traits<InputIt>::value_type const& insert_element{*(first + idx)};
70+
typename cuda::std::iterator_traits<InputIt>::value_type const& insert_element{
71+
*(first + idx)};
7072
ref.add(tile, insert_element);
7173
}
7274
idx += loop_stride;
@@ -96,14 +98,14 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void contains_if_n(InputIt first,
9698

9799
if constexpr (CGSize == 1) {
98100
while (idx < n) {
99-
typename std::iterator_traits<InputIt>::value_type const& key = *(first + idx);
101+
typename cuda::std::iterator_traits<InputIt>::value_type const& key = *(first + idx);
100102
*(out + idx) = pred(*(stencil + idx)) ? ref.contains(key) : false;
101103
idx += loop_stride;
102104
}
103105
} else {
104106
auto const tile = cg::tiled_partition<CGSize>(cg::this_thread_block());
105107
while (idx < n) {
106-
typename std::iterator_traits<InputIt>::value_type const& key = *(first + idx);
108+
typename cuda::std::iterator_traits<InputIt>::value_type const& key = *(first + idx);
107109
auto const found = pred(*(stencil + idx)) ? ref.contains(tile, key) : false;
108110
if (tile.thread_rank() == 0) { *(out + idx) = found; }
109111
idx += loop_stride;

include/cuco/detail/hash_functions/murmurhash3.cuh

Lines changed: 70 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -28,16 +28,70 @@
2828
namespace cuco::detail {
2929

3030
/**
31-
* @brief The 32bit integer finalizer hash function of `MurmurHash3`.
31+
* @brief The 32-bit integer finalizer function of `MurmurHash3`.
32+
*
33+
* This function implements the final mixing step of the `MurmurHash3` algorithm for 32-bit values.
34+
* It is designed to improve the avalanche behavior of the hash, ensuring that changes in input bits
35+
* have a more uniform effect on all output bits.
3236
*
3337
* @throw Key type must be 4 bytes in size
3438
*
35-
* @tparam Key The type of the values to hash
39+
* @tparam Key The type of the value to finalize
40+
*
41+
* @param key The input value to finalize
42+
* @param seed Optional seed value
43+
* @return The finalized 32-bit hash value
3644
*/
3745
template <typename Key>
38-
struct MurmurHash3_fmix32 {
46+
__host__ __device__ constexpr std::uint32_t fmix32(Key key, std::uint32_t seed = 0) noexcept
47+
{
3948
static_assert(sizeof(Key) == 4, "Key type must be 4 bytes in size.");
4049

50+
std::uint32_t h = static_cast<std::uint32_t>(key) ^ seed;
51+
h ^= h >> 16;
52+
h *= 0x85ebca6b;
53+
h ^= h >> 13;
54+
h *= 0xc2b2ae35;
55+
h ^= h >> 16;
56+
return h;
57+
}
58+
59+
/**
60+
* @brief The 64-bit integer finalizer function of `MurmurHash3`.
61+
*
62+
* This function implements the final mixing step of the `MurmurHash3` algorithm for 64-bit values.
63+
* It is designed to improve the avalanche behavior of the hash, ensuring that changes in input bits
64+
* have a more uniform effect on all output bits.
65+
*
66+
* @throw Key type must be 8 bytes in size
67+
*
68+
* @tparam Key The type of the value to finalize
69+
*
70+
* @param key The input value to finalize
71+
* @param seed Optional seed value
72+
* @return The finalized 64-bit hash value
73+
*/
74+
template <typename Key>
75+
__host__ __device__ constexpr std::uint64_t fmix64(Key key, std::uint64_t seed = 0) noexcept
76+
{
77+
static_assert(sizeof(Key) == 8, "Key type must be 8 bytes in size.");
78+
79+
std::uint64_t h = static_cast<std::uint64_t>(key) ^ seed;
80+
h ^= h >> 33;
81+
h *= 0xff51afd7ed558ccdULL;
82+
h ^= h >> 33;
83+
h *= 0xc4ceb9fe1a85ec53ULL;
84+
h ^= h >> 33;
85+
return h;
86+
}
87+
88+
/**
89+
* @brief The 32bit integer finalizer hash function of `MurmurHash3`.
90+
*
91+
* @tparam Key The type of the values to hash
92+
*/
93+
template <typename Key>
94+
struct MurmurHash3_fmix32 {
4195
using argument_type = Key; ///< The type of the values taken as argument
4296
using result_type = std::uint32_t; ///< The type of the hash values produced
4397

@@ -56,13 +110,7 @@ struct MurmurHash3_fmix32 {
56110
*/
57111
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
58112
{
59-
std::uint32_t h = static_cast<std::uint32_t>(key) ^ seed_;
60-
h ^= h >> 16;
61-
h *= 0x85ebca6b;
62-
h ^= h >> 13;
63-
h *= 0xc2b2ae35;
64-
h ^= h >> 16;
65-
return h;
113+
return fmix32(key, seed_);
66114
}
67115

68116
private:
@@ -72,14 +120,10 @@ struct MurmurHash3_fmix32 {
72120
/**
73121
* @brief The 64bit integer finalizer hash function of `MurmurHash3`.
74122
*
75-
* @throw Key type must be 8 bytes in size
76-
*
77123
* @tparam Key The type of the values to hash
78124
*/
79125
template <typename Key>
80126
struct MurmurHash3_fmix64 {
81-
static_assert(sizeof(Key) == 8, "Key type must be 8 bytes in size.");
82-
83127
using argument_type = Key; ///< The type of the values taken as argument
84128
using result_type = std::uint64_t; ///< The type of the hash values produced
85129

@@ -98,13 +142,7 @@ struct MurmurHash3_fmix64 {
98142
*/
99143
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
100144
{
101-
std::uint64_t h = static_cast<std::uint64_t>(key) ^ seed_;
102-
h ^= h >> 33;
103-
h *= 0xff51afd7ed558ccd;
104-
h ^= h >> 33;
105-
h *= 0xc4ceb9fe1a85ec53;
106-
h ^= h >> 33;
107-
return h;
145+
return fmix64(key, seed_);
108146
}
109147

110148
private:
@@ -136,7 +174,7 @@ struct MurmurHash3_32 {
136174
*
137175
* @param seed A custom number to randomize the resulting hash value
138176
*/
139-
__host__ __device__ constexpr MurmurHash3_32(std::uint32_t seed = 0) : fmix32_{0}, seed_{seed} {}
177+
__host__ __device__ constexpr MurmurHash3_32(std::uint32_t seed = 0) : seed_{seed} {}
140178

141179
/**
142180
* @brief Returns a hash value for its argument, as a value of type `result_type`.
@@ -199,7 +237,7 @@ struct MurmurHash3_32 {
199237
//----------
200238
// finalization
201239
h1 ^= size;
202-
h1 = fmix32_(h1);
240+
h1 = fmix32(h1);
203241
return h1;
204242
}
205243

@@ -224,12 +262,6 @@ struct MurmurHash3_32 {
224262
}
225263

226264
private:
227-
constexpr __host__ __device__ std::uint32_t rotl32(std::uint32_t x, std::int8_t r) const noexcept
228-
{
229-
return (x << r) | (x >> (32 - r));
230-
}
231-
232-
MurmurHash3_fmix32<std::uint32_t> fmix32_;
233265
std::uint32_t seed_;
234266
};
235267

@@ -258,10 +290,7 @@ struct MurmurHash3_x64_128 {
258290
*
259291
* @param seed A custom number to randomize the resulting hash value
260292
*/
261-
__host__ __device__ constexpr MurmurHash3_x64_128(std::uint64_t seed = 0)
262-
: fmix64_{0}, seed_{seed}
263-
{
264-
}
293+
__host__ __device__ constexpr MurmurHash3_x64_128(std::uint64_t seed = 0) : seed_{seed} {}
265294

266295
/**
267296
* @brief Returns a hash value for its argument, as a value of type `result_type`.
@@ -362,8 +391,8 @@ struct MurmurHash3_x64_128 {
362391
h1 += h2;
363392
h2 += h1;
364393

365-
h1 = fmix64_(h1);
366-
h2 = fmix64_(h2);
394+
h1 = fmix64(h1);
395+
h2 = fmix64(h2);
367396

368397
h1 += h2;
369398
h2 += h1;
@@ -392,7 +421,6 @@ struct MurmurHash3_x64_128 {
392421
}
393422

394423
private:
395-
MurmurHash3_fmix64<std::uint64_t> fmix64_;
396424
std::uint64_t seed_;
397425
};
398426

@@ -421,10 +449,7 @@ struct MurmurHash3_x86_128 {
421449
*
422450
* @param seed A custom number to randomize the resulting hash value
423451
*/
424-
__host__ __device__ constexpr MurmurHash3_x86_128(std::uint32_t seed = 0)
425-
: fmix32_{0}, seed_{seed}
426-
{
427-
}
452+
__host__ __device__ constexpr MurmurHash3_x86_128(std::uint32_t seed = 0) : seed_{seed} {}
428453

429454
/**
430455
* @brief Returns a hash value for its argument, as a value of type `result_type`.
@@ -573,10 +598,10 @@ struct MurmurHash3_x86_128 {
573598
h3 += h1;
574599
h4 += h1;
575600

576-
h1 = fmix32_(h1);
577-
h2 = fmix32_(h2);
578-
h3 = fmix32_(h3);
579-
h4 = fmix32_(h4);
601+
h1 = fmix32(h1);
602+
h2 = fmix32(h2);
603+
h3 = fmix32(h3);
604+
h4 = fmix32(h4);
580605

581606
h1 += h2;
582607
h1 += h3;
@@ -609,7 +634,6 @@ struct MurmurHash3_x86_128 {
609634
}
610635

611636
private:
612-
MurmurHash3_fmix32<std::uint32_t> fmix32_;
613637
std::uint32_t seed_;
614638
};
615639

include/cuco/detail/open_addressing/functors.cuh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@
1818
#include <cuco/detail/bitwise_compare.cuh>
1919
#include <cuco/detail/pair/traits.hpp>
2020

21+
#include <thrust/tuple.h>
22+
2123
namespace cuco::detail::open_addressing_ns {
2224

2325
/**
@@ -49,7 +51,7 @@ struct get_slot {
4951
auto const intra_idx = idx % StorageRef::bucket_size;
5052
if constexpr (HasPayload) {
5153
auto const [first, second] = storage_[bucket_idx][intra_idx];
52-
return thrust::make_tuple(first, second);
54+
return thrust::tuple{first, second};
5355
} else {
5456
return storage_[bucket_idx][intra_idx];
5557
}

0 commit comments

Comments
 (0)