Skip to content

Commit e1203be

Browse files
authored
Bloom filter optimizations (2/5): Eliminate lmem access during salt lookup in arrow policy (#670)
1 parent ac4ba6b commit e1203be

File tree

1 file changed

+32
-26
lines changed

1 file changed

+32
-26
lines changed

include/cuco/detail/bloom_filter/arrow_filter_policy.cuh

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -83,10 +83,10 @@ namespace cuco::detail {
8383
template <class Key, template <typename> class XXHash64>
8484
class arrow_filter_policy {
8585
public:
86-
using hasher = XXHash64<Key>; ///< 64-bit XXHash hasher for Arrow bloom filter policy
87-
using word_type = std::uint32_t; ///< uint32_t for Arrow bloom filter policy
88-
using key_type = Key; ///< Hash function input type
89-
using hash_value_type = std::uint64_t; ///< hash function output type
86+
using hasher = XXHash64<Key>; ///< 64-bit XXHash hasher for Arrow bloom filter policy
87+
using word_type = std::uint32_t; ///< uint32_t for Arrow bloom filter policy
88+
using key_type = Key; ///< Hash function input type
89+
using hash_result_type = std::uint64_t; ///< hash function output type
9090

9191
static constexpr uint32_t bits_set_per_block = 8; ///< hardcoded bits set per Arrow filter block
9292
static constexpr uint32_t words_per_block = 8; ///< hardcoded words per Arrow filter block
@@ -99,21 +99,6 @@ class arrow_filter_policy {
9999
(max_arrow_filter_bytes /
100100
bytes_per_filter_block); ///< Max sub-filter blocks allowed in Arrow bloom filter
101101

102-
private:
103-
// Arrow's block-based bloom filter algorithm needs these eight odd SALT values to calculate
104-
// eight indexes of bit to set, one bit in each 32-bit (uint32_t) word.
105-
__device__ static constexpr cuda::std::array<std::uint32_t, 8> SALT()
106-
{
107-
return {0x47b6137bU,
108-
0x44974d91U,
109-
0x8824ad5bU,
110-
0xa2b7289dU,
111-
0x705495c7U,
112-
0x2df1424bU,
113-
0x9efc4947U,
114-
0x5c6bfb31U};
115-
}
116-
117102
public:
118103
/**
119104
* @brief Constructs the `arrow_filter_policy` object.
@@ -133,7 +118,7 @@ class arrow_filter_policy {
133118
*
134119
* @return The hash value of the key
135120
*/
136-
__device__ constexpr hash_value_type hash(key_type const& key) const { return hash_(key); }
121+
__device__ constexpr hash_result_type hash(key_type const& key) const { return hash_(key); }
137122

138123
/**
139124
* @brief Determines the filter block a key is added into.
@@ -150,7 +135,7 @@ class arrow_filter_policy {
150135
* @return The block index for the given key's hash value
151136
*/
152137
template <class Extent>
153-
__device__ constexpr auto block_index(hash_value_type hash, Extent num_blocks) const
138+
__device__ constexpr auto block_index(hash_result_type hash, Extent num_blocks) const
154139
{
155140
constexpr auto hash_bits = cuda::std::numeric_limits<word_type>::digits;
156141
// TODO: assert if num_blocks > max_filter_blocks
@@ -168,12 +153,33 @@ class arrow_filter_policy {
168153
*
169154
* @return The bit pattern for the word/segment in the filter block
170155
*/
171-
__device__ constexpr word_type word_pattern(hash_value_type hash, std::uint32_t word_index) const
156+
__device__ constexpr word_type word_pattern(hash_result_type hash, std::uint32_t word_index) const
172157
{
173-
// SALT array to calculate bit indexes for the current word
174-
auto constexpr salt = SALT();
175158
word_type const key = static_cast<word_type>(hash);
176-
return word_type{1} << ((key * salt[word_index]) >> 27);
159+
std::uint32_t salt;
160+
161+
// Basically a switch (word_index) { case 0-7 ... }
162+
// First split: 0..3 versus 4..7.
163+
if (word_index < 4) {
164+
// For indices 0..3, further split into 0..1 and 2..3.
165+
if (word_index < 2) {
166+
// word_index is 0 or 1.
167+
salt = (word_index == 0) ? 0x47b6137bU : 0x44974d91U;
168+
} else {
169+
// word_index is 2 or 3.
170+
salt = (word_index == 2) ? 0x8824ad5bU : 0xa2b7289dU;
171+
}
172+
} else {
173+
// For indices 4..7, further split into 4..5 and 6..7.
174+
if (word_index < 6) {
175+
// word_index is 4 or 5.
176+
salt = (word_index == 4) ? 0x705495c7U : 0x2df1424bU;
177+
} else {
178+
// word_index is 6 or 7.
179+
salt = (word_index == 6) ? 0x9efc4947U : 0x5c6bfb31U;
180+
}
181+
}
182+
return word_type{1} << ((key * salt) >> 27);
177183
}
178184

179185
private:

0 commit comments

Comments
 (0)