@@ -97,7 +97,7 @@ class bloom_filter_ref {
9797 * @brief Erases all information from the filter.
9898 *
9999 * @note This function synchronizes the given stream. For asynchronous execution use
100- * ` clear_async` .
100+ * clear_async() .
101101 *
102102 * @param stream CUDA stream used for device memory operations and kernel launches
103103 */
@@ -114,7 +114,7 @@ class bloom_filter_ref {
114114 /* *
115115 * @brief Device function that adds a key to the filter.
116116 *
117- * @tparam ProbeKey Input type that is implicitly convertible to ` key_type`
117+ * @tparam ProbeKey Input type that is implicitly convertible to @ref key_type
118118 *
119119 * @param key The key to be added
120120 */
@@ -124,7 +124,7 @@ class bloom_filter_ref {
124124 /* *
125125 * @brief Device function that cooperatively adds a key to the filter.
126126 *
127- * @note Best performance is achieved if the size of the CG is equal to ` words_per_block` .
127+ * @note Best performance is achieved if the size of the CG is equal to @ref words_per_block.
128128 *
129129 * @tparam CG Cooperative Group type
130130 * @tparam ProbeKey Input key type
@@ -139,7 +139,7 @@ class bloom_filter_ref {
139139 * @brief Device function that adds all keys in the range `[first, last)` to the filter.
140140 *
141141 * @note Best performance is achieved if the size of the CG is larger than or equal to
142- * ` words_per_block` .
142+ * @ref words_per_block.
143143 *
144144 * @tparam CG Cooperative Group type
145145 * @tparam InputIt Device-accessible random access input key iterator
@@ -155,7 +155,7 @@ class bloom_filter_ref {
155155 * @brief Adds all keys in the range `[first, last)` to the filter.
156156 *
157157 * @note This function synchronizes the given stream. For asynchronous execution use
158- * ` add_async` .
158+ * add_async() .
159159 *
160160 * @tparam InputIt Device-accessible random access input key iterator
161161 *
@@ -187,7 +187,7 @@ class bloom_filter_ref {
187187 *
188188 * @note The key `*(first + i)` is added if `pred( *(stencil + i) )` returns `true`.
189189 * @note This function synchronizes the given stream and returns the number of successful
190- * insertions. For asynchronous execution use ` add_if_async` .
190+ * insertions. For asynchronous execution use add_if_async() .
191191 *
192192 * @tparam InputIt Device-accessible random access input key iterator
193193 * @tparam StencilIt Device-accessible random-access iterator whose `value_type` is
@@ -275,7 +275,7 @@ class bloom_filter_ref {
275275 * filter.
276276 *
277277 * @note This function synchronizes the given stream. For asynchronous execution use
278- * ` contains_async` .
278+ * contains_async() .
279279 *
280280 * @tparam InputIt Device-accessible random access input iterator where
281281 * <tt>std::is_convertible<std::iterator_traits<InputIt>::value_type,
@@ -321,7 +321,7 @@ class bloom_filter_ref {
321321 *
322322 * @note The key `*(first + i)` is queried if `pred( *(stencil + i) )` returns `true`.
323323 * @note This function synchronizes the given stream. For asynchronous execution use
324- * ` contains_if_async` .
324+ * contains_if_async() .
325325 *
326326 * @tparam InputIt Device-accessible random access input iterator where
327327 * <tt>std::is_convertible<std::iterator_traits<InputIt>::value_type,
@@ -381,6 +381,85 @@ class bloom_filter_ref {
381381 cuda::stream_ref stream = cuda::stream_ref{
382382 cudaStream_t{nullptr }}) const noexcept ;
383383
384+ /* *
385+ * @brief Merge another bloom filter into this.
386+ *
387+ * @note Modifies `this` in place.
388+ * @note This function synchronizes the given stream. For asynchronous execution use
389+ * merge_async().
390+ *
391+ * @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
392+ * construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
393+ * then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$.
394+ *
395+ * @param other Other filter with matching type to this.
396+ * @param stream CUDA stream used for device memory operations and kernel launches.
397+ *
398+ * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
399+ */
400+ __host__ constexpr void merge (bloom_filter_ref<Key, Extent, Scope, Policy> const & other,
401+ cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr }});
402+
403+ /* *
404+ * @brief Asynchronously merge another bloom filter into this.
405+ *
406+ * @note Modifies `this` in place.
407+ *
408+ * @note This performs the set union of the two filters. Let \f$f : X \to B\f$ denote the
409+ * construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
410+ * then it holds that \f$f(A \cup B) = f(A) \cup f(B)\f$
411+ *
412+ * @param other Other filter with matching type to this.
413+ * @param stream CUDA stream used for device memory operations and kernel launches.
414+ *
415+ * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
416+ */
417+ __host__ constexpr void merge_async (bloom_filter_ref<Key, Extent, Scope, Policy> const & other,
418+ cuda::stream_ref stream = cuda::stream_ref{
419+ cudaStream_t{nullptr }});
420+
421+ /* *
422+ * @brief Intersect another bloom filter into this.
423+ *
424+ * @note Modifies `this` in place.
425+ * @note This function synchronizes the given stream. For asynchronous execution use
426+ * intersect_async().
427+ *
428+ * @note This performs the set intersection of the two filters. Unlike merge(), this operation
429+ * does not distribute over filter construction and therefore only approximates the bloom filter
430+ * of the intersection of the input sets. In other words, let \f$f : X \to B\f$ denote the
431+ * construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be two sets,
432+ * then \f$f(A \cap B) \ne f(A) \cap f(B)\f$.
433+ *
434+ * @param other Other filter with matching type to this.
435+ * @param stream CUDA stream used for device memory operations and kernel launches.
436+ *
437+ * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
438+ */
439+ __host__ constexpr void intersect (bloom_filter_ref<Key, Extent, Scope, Policy> const & other,
440+ cuda::stream_ref stream = cuda::stream_ref{
441+ cudaStream_t{nullptr }});
442+
443+ /* *
444+ * @brief Asynchronously intersect another bloom filter into this.
445+ *
446+ * @note Modifies `this` in place.
447+ *
448+ * @note This performs the set intersection of the two filters. Unlike merge_async(), this
449+ * operation does not distribute over filter construction and therefore only approximates the
450+ * bloom filter of the intersection of the input sets. In other words, let \f$f : X \to B\f$
451+ * denote the construction of a bloom filter on some set \f$X\f$, and let \f$A\f$ and \f$B\f$ be
452+ * two sets, then \f$f(A \cap B) \ne f(A) \cap f(B)\f$.
453+ *
454+ * @param other Other filter with matching type to this.
455+ * @param stream CUDA stream used for device memory operations and kernel launches.
456+ *
457+ * @throws cuco::logic_error If the other filter does not have the same number of blocks as this.
458+ */
459+ __host__ constexpr void intersect_async (bloom_filter_ref<Key, Extent, Scope, Policy> const & other,
460+ cuda::stream_ref stream = cuda::stream_ref{
461+ cudaStream_t{nullptr }});
462+
384463 /* *
385464 * @brief Gets a pointer to the underlying filter storage.
386465 *
@@ -407,4 +486,4 @@ class bloom_filter_ref {
407486};
408487} // namespace cuco
409488
410- #include < cuco/detail/bloom_filter/bloom_filter_ref.inl>
489+ #include < cuco/detail/bloom_filter/bloom_filter_ref.inl>
0 commit comments