Skip to content

Commit 0f40943

Browse files
committed
Implement set union and (approximate) intersection for bloom filters
Given two bloom filters it is often useful to be able to compute their union and intersection. That is, suppose we have two sets A and B, denote by f(.) the construction of a bloom filter with a given set of parameters. Then we would like to be able to (approximate) f(A v B) and f(A ^ B) given f(A) and f(B). The union operation is exact, and obtained by the bitwise or of fingerprints, so we have f(A v B) = f(A) v f(B). In contrast the intersection, obtained by bitwise and of the fingerprints is only approximate so we have f(A ^ B) ~= f(A) ^ f(B). - Closes #602.
1 parent b4e5eb2 commit 0f40943

File tree

6 files changed

+282
-19
lines changed

6 files changed

+282
-19
lines changed

doxygen/Doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1508,7 +1508,7 @@ FORMULA_MACROFILE =
15081508
# The default value is: NO.
15091509
# This tag requires that the tag GENERATE_HTML is set to YES.
15101510

1511-
USE_MATHJAX = NO
1511+
USE_MATHJAX = YES
15121512

15131513
# When MathJax is enabled you can set the default output format to be used for
15141514
# the MathJax output. See the MathJax site (see:

include/cuco/bloom_filter.cuh

Lines changed: 86 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ namespace cuco {
4242
* - Host-side "bulk" operations
4343
* - Device-side "singular" operations
4444
*
45-
* The host-side bulk operations include `add`, `contains`, etc. These APIs should be used when
45+
* The host-side bulk operations include add(), contains(), etc. These APIs should be used when
4646
* there are a large number of keys to add or lookup. For example, given a range of keys
4747
* specified by device-accessible iterators, the bulk `add` function will add all keys into
4848
* the filter.
@@ -124,7 +124,7 @@ class bloom_filter {
124124
* @brief Erases all information from the filter.
125125
*
126126
* @note This function synchronizes the given stream. For asynchronous execution use
127-
* `clear_async`.
127+
* clear_async().
128128
*
129129
* @param stream CUDA stream used for device memory operations and kernel launches
130130
*/
@@ -142,7 +142,7 @@ class bloom_filter {
142142
* @brief Adds all keys in the range `[first, last)` to the filter.
143143
*
144144
* @note This function synchronizes the given stream. For asynchronous execution use
145-
* `add_async`.
145+
* add_async().
146146
*
147147
* @tparam InputIt Device-accessible random access input key iterator
148148
* @param first Beginning of the sequence of keys
@@ -173,7 +173,7 @@ class bloom_filter {
173173
*
174174
* @note The key `*(first + i)` is added if `pred( *(stencil + i) )` returns `true`.
175175
* @note This function synchronizes the given stream and returns the number of successful
176-
* insertions. For asynchronous execution use `add_if_async`.
176+
* insertions. For asynchronous execution use add_if_async().
177177
*
178178
* @tparam InputIt Device-accessible random access input key iterator
179179
* @tparam StencilIt Device-accessible random-access iterator whose `value_type` is
@@ -227,7 +227,7 @@ class bloom_filter {
227227
* filter.
228228
*
229229
* @note This function synchronizes the given stream. For asynchronous execution use
230-
* `contains_async`.
230+
* contains_async().
231231
*
232232
* @tparam InputIt Device-accessible random access input key iterator
233233
* @tparam OutputIt Device-accessible output iterator assignable from `bool`
@@ -269,7 +269,7 @@ class bloom_filter {
269269
*
270270
* @note The key `*(first + i)` is queried if `pred( *(stencil + i) )` returns `true`.
271271
* @note This function synchronizes the given stream. For asynchronous execution use
272-
* `contains_if_async`.
272+
* contains_if_async().
273273
*
274274
* @tparam InputIt Device-accessible random access input key iterator
275275
* @tparam StencilIt Device-accessible random-access iterator whose `value_type` is
@@ -325,6 +325,85 @@ class bloom_filter {
325325
cuda::stream_ref stream = cuda::stream_ref{
326326
cudaStream_t{nullptr}}) const noexcept;
327327

328+
/**
329+
* @brief Merge another bloom filter into this.
330+
*
331+
* @note Modifies `this` in place.
332+
* @note This function synchronizes the given stream. For asynchronous execution use
333+
* merge_async().
334+
*
335+
* @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
336+
* construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
337+
* then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$.
338+
*
339+
* @param other Other filter with matching type to this.
340+
* @param stream CUDA stream used for device memory operations and kernel launches.
341+
*
342+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
343+
*/
344+
__host__ constexpr void merge(bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
345+
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
346+
347+
/**
348+
* @brief Asynchronously merge another bloom filter into this.
349+
*
350+
* @note Modifies `this` in place.
351+
*
352+
* @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
353+
* construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
354+
* then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$
355+
*
356+
* @param other Other filter with matching type to this.
357+
* @param stream CUDA stream used for device memory operations and kernel launches.
358+
*
359+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
360+
*/
361+
__host__ constexpr void merge_async(
362+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
363+
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
364+
365+
/**
366+
* @brief Intersect another bloom filter into this.
367+
*
368+
* @note Modifies `this` in place.
369+
* @note This function synchronizes the given stream. For asynchronous execution use
370+
* intersect_async().
371+
*
372+
* @note This performs the set intersection of the two filters. Unlike merge(), this operation
373+
* does not distribute over filter construction and therefore only approximates the bloom filter
374+
* of the intersection of the input sets. In other words, let \f$f : X \to B\f$ denote the
375+
* construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
376+
* then \f$(A \cap B) \ne f(A) \cap f(B)\f$.
377+
*
378+
* @param other Other filter with matching type to this.
379+
* @param stream CUDA stream used for device memory operations and kernel launches.
380+
*
381+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
382+
*/
383+
__host__ constexpr void intersect(
384+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
385+
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
386+
387+
/**
388+
* @brief Asynchronously intersect another bloom filter into this.
389+
*
390+
* @note Modifies `this` in place.
391+
*
392+
* @note This performs the set intersection of the two filters. Unlike merge_async(), this
393+
* operation does not distribute over filter construction and therefore only approximates the
394+
* bloom filter of the intersection of the input sets. In other words, let \f$f : X \to B\f$
395+
* denote the construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be
396+
* two sets, then \f$(A \cap B) \ne f(A) \cap f(B)\f$.
397+
*
398+
* @param other Other filter with matching type to this.
399+
* @param stream CUDA stream used for device memory operations and kernel launches.
400+
*
401+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
402+
*/
403+
__host__ constexpr void intersect_async(
404+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other,
405+
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
406+
328407
/**
329408
* @brief Gets a pointer to the underlying filter storage.
330409
*
@@ -369,4 +448,4 @@ class bloom_filter {
369448
};
370449
} // namespace cuco
371450

372-
#include <cuco/detail/bloom_filter/bloom_filter.inl>
451+
#include <cuco/detail/bloom_filter/bloom_filter.inl>

include/cuco/bloom_filter_ref.cuh

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class bloom_filter_ref {
9797
* @brief Erases all information from the filter.
9898
*
9999
* @note This function synchronizes the given stream. For asynchronous execution use
100-
* `clear_async`.
100+
* clear_async().
101101
*
102102
* @param stream CUDA stream used for device memory operations and kernel launches
103103
*/
@@ -114,7 +114,7 @@ class bloom_filter_ref {
114114
/**
115115
* @brief Device function that adds a key to the filter.
116116
*
117-
* @tparam ProbeKey Input type that is implicitly convertible to `key_type`
117+
* @tparam ProbeKey Input type that is implicitly convertible to @ref key_type
118118
*
119119
* @param key The key to be added
120120
*/
@@ -124,7 +124,7 @@ class bloom_filter_ref {
124124
/**
125125
* @brief Device function that cooperatively adds a key to the filter.
126126
*
127-
* @note Best performance is achieved if the size of the CG is equal to `words_per_block`.
127+
* @note Best performance is achieved if the size of the CG is equal to @ref words_per_block.
128128
*
129129
* @tparam CG Cooperative Group type
130130
* @tparam ProbeKey Input key type
@@ -139,7 +139,7 @@ class bloom_filter_ref {
139139
* @brief Device function that adds all keys in the range `[first, last)` to the filter.
140140
*
141141
* @note Best performance is achieved if the size of the CG is larger than or equal to
142-
* `words_per_block`.
142+
* @ref words_per_block.
143143
*
144144
* @tparam CG Cooperative Group type
145145
* @tparam InputIt Device-accessible random access input key iterator
@@ -155,7 +155,7 @@ class bloom_filter_ref {
155155
* @brief Adds all keys in the range `[first, last)` to the filter.
156156
*
157157
* @note This function synchronizes the given stream. For asynchronous execution use
158-
* `add_async`.
158+
* add_async().
159159
*
160160
* @tparam InputIt Device-accessible random access input key iterator
161161
*
@@ -187,7 +187,7 @@ class bloom_filter_ref {
187187
*
188188
* @note The key `*(first + i)` is added if `pred( *(stencil + i) )` returns `true`.
189189
* @note This function synchronizes the given stream and returns the number of successful
190-
* insertions. For asynchronous execution use `add_if_async`.
190+
* insertions. For asynchronous execution use add_if_async().
191191
*
192192
* @tparam InputIt Device-accessible random access input key iterator
193193
* @tparam StencilIt Device-accessible random-access iterator whose `value_type` is
@@ -275,7 +275,7 @@ class bloom_filter_ref {
275275
* filter.
276276
*
277277
* @note This function synchronizes the given stream. For asynchronous execution use
278-
* `contains_async`.
278+
* contains_async().
279279
*
280280
* @tparam InputIt Device-accessible random access input iterator where
281281
* <tt>std::is_convertible<std::iterator_traits<InputIt>::value_type,
@@ -321,7 +321,7 @@ class bloom_filter_ref {
321321
*
322322
* @note The key `*(first + i)` is queried if `pred( *(stencil + i) )` returns `true`.
323323
* @note This function synchronizes the given stream. For asynchronous execution use
324-
* `contains_if_async`.
324+
* contains_if_async().
325325
*
326326
* @tparam InputIt Device-accessible random access input iterator where
327327
* <tt>std::is_convertible<std::iterator_traits<InputIt>::value_type,
@@ -381,6 +381,85 @@ class bloom_filter_ref {
381381
cuda::stream_ref stream = cuda::stream_ref{
382382
cudaStream_t{nullptr}}) const noexcept;
383383

384+
/**
385+
* @brief Merge another bloom filter into this.
386+
*
387+
* @note Modifies `this` in place.
388+
* @note This function synchronizes the given stream. For asynchronous execution use
389+
* merge_async().
390+
*
391+
* @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
392+
* construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
393+
* then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$.
394+
*
395+
* @param other Other filter with matching type to this.
396+
* @param stream CUDA stream used for device memory operations and kernel launches.
397+
*
398+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
399+
*/
400+
__host__ constexpr void merge(bloom_filter_ref<Key, Extent, Scope, Policy> const& other,
401+
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
402+
403+
/**
404+
* @brief Asynchronously merge another bloom filter into this.
405+
*
406+
* @note Modifies `this` in place.
407+
*
408+
* @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
409+
* construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
410+
* then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$
411+
*
412+
* @param other Other filter with matching type to this.
413+
* @param stream CUDA stream used for device memory operations and kernel launches.
414+
*
415+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
416+
*/
417+
__host__ constexpr void merge_async(bloom_filter_ref<Key, Extent, Scope, Policy> const& other,
418+
cuda::stream_ref stream = cuda::stream_ref{
419+
cudaStream_t{nullptr}});
420+
421+
/**
422+
* @brief Intersect another bloom filter into this.
423+
*
424+
* @note Modifies `this` in place.
425+
* @note This function synchronizes the given stream. For asynchronous execution use
426+
* intersect_async().
427+
*
428+
* @note This performs the set intersection of the two filters. Unlike merge(), this operation
429+
* does not distribute over filter construction and therefore only approximates the bloom filter
430+
* of the intersection of the input sets. In other words, let \f$f : X \to B\f$ denote the
431+
* construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
432+
* then \f$f(A \cap B) \ne f(A) \cap f(B)\f$.
433+
*
434+
* @param other Other filter with matching type to this.
435+
* @param stream CUDA stream used for device memory operations and kernel launches.
436+
*
437+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
438+
*/
439+
__host__ constexpr void intersect(bloom_filter_ref<Key, Extent, Scope, Policy> const& other,
440+
cuda::stream_ref stream = cuda::stream_ref{
441+
cudaStream_t{nullptr}});
442+
443+
/**
444+
* @brief Asynchronously intersect another bloom filter into this.
445+
*
446+
* @note Modifies `this` in place.
447+
*
448+
* @note This performs the set intersection of the two filters. Unlike merge_async(), this
449+
* operation does not distribute over filter construction and therefore only approximates the
450+
* bloom filter of the intersection of the input sets. In other words, let \f$f : X \to B\f$
451+
* denote the construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be
452+
* two sets, then \f$f(A \cap B) \ne f(A) \cap f(B)\f$.
453+
*
454+
* @param other Other filter with matching type to this.
455+
* @param stream CUDA stream used for device memory operations and kernel launches.
456+
*
457+
* @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
458+
*/
459+
__host__ constexpr void intersect_async(bloom_filter_ref<Key, Extent, Scope, Policy> const& other,
460+
cuda::stream_ref stream = cuda::stream_ref{
461+
cudaStream_t{nullptr}});
462+
384463
/**
385464
* @brief Gets a pointer to the underlying filter storage.
386465
*
@@ -407,4 +486,4 @@ class bloom_filter_ref {
407486
};
408487
} // namespace cuco
409488

410-
#include <cuco/detail/bloom_filter/bloom_filter_ref.inl>
489+
#include <cuco/detail/bloom_filter/bloom_filter_ref.inl>

include/cuco/detail/bloom_filter/bloom_filter.inl

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,34 @@ __host__ constexpr void bloom_filter<Key, Extent, Scope, Policy, Allocator>::con
129129
ref_.contains_if_async(first, last, stencil, pred, output_begin, stream);
130130
}
131131

132+
template <class Key, class Extent, cuda::thread_scope Scope, class Policy, class Allocator>
133+
__host__ constexpr void bloom_filter<Key, Extent, Scope, Policy, Allocator>::merge(
134+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other, cuda::stream_ref stream)
135+
{
136+
ref_.merge(other.ref_, stream);
137+
}
138+
139+
template <class Key, class Extent, cuda::thread_scope Scope, class Policy, class Allocator>
140+
__host__ constexpr void bloom_filter<Key, Extent, Scope, Policy, Allocator>::merge_async(
141+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other, cuda::stream_ref stream)
142+
{
143+
ref_.merge_async(other.ref_, stream);
144+
}
145+
146+
template <class Key, class Extent, cuda::thread_scope Scope, class Policy, class Allocator>
147+
__host__ constexpr void bloom_filter<Key, Extent, Scope, Policy, Allocator>::intersect(
148+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other, cuda::stream_ref stream)
149+
{
150+
ref_.intersect(other.ref_, stream);
151+
}
152+
153+
template <class Key, class Extent, cuda::thread_scope Scope, class Policy, class Allocator>
154+
__host__ constexpr void bloom_filter<Key, Extent, Scope, Policy, Allocator>::intersect_async(
155+
bloom_filter<Key, Extent, Scope, Policy, Allocator> const& other, cuda::stream_ref stream)
156+
{
157+
ref_.intersect_async(other.ref_, stream);
158+
}
159+
132160
template <class Key, class Extent, cuda::thread_scope Scope, class Policy, class Allocator>
133161
[[nodiscard]] __host__ constexpr
134162
typename bloom_filter<Key, Extent, Scope, Policy, Allocator>::word_type*
@@ -169,4 +197,4 @@ template <class Key, class Extent, cuda::thread_scope Scope, class Policy, class
169197
return ref_;
170198
}
171199

172-
} // namespace cuco
200+
} // namespace cuco

0 commit comments

Comments
 (0)