Expose HLL ctor overload taking precision

PointKernel · PointKernel · commit 0bd37a600a27 · 2025-12-18T09:43:17.000-08:00
diff --git a/include/cuco/detail/hyperloglog/hyperloglog.inl b/include/cuco/detail/hyperloglog/hyperloglog.inl
@@ -49,6 +49,22 @@ constexpr hyperloglog<T, Scope, Hash, Allocator>::hyperloglog(
   this->clear_async(stream);
 }
 
+template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
+constexpr hyperloglog<T, Scope, Hash, Allocator>::hyperloglog(cuco::precision precision,
+                                                              Hash const& hash,
+                                                              Allocator const& alloc,
+                                                              cuda::stream_ref stream)
+  : allocator_{alloc},
+    sketch_{
+      allocator_.allocate(sketch_bytes(precision) / sizeof(register_type), stream),
+      detail::custom_deleter{sketch_bytes(precision) / sizeof(register_type), allocator_, stream}},
+    ref_{
+      cuda::std::span{reinterpret_cast<cuda::std::byte*>(sketch_.get()), sketch_bytes(precision)},
+      hash}
+{
+  this->clear_async(stream);
+}
+
 template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
 constexpr void hyperloglog<T, Scope, Hash, Allocator>::clear_async(cuda::stream_ref stream) noexcept
 {
@@ -166,6 +182,13 @@ constexpr size_t hyperloglog<T, Scope, Hash, Allocator>::sketch_bytes(
   return ref_type<>::sketch_bytes(standard_deviation);
 }
 
+template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
+constexpr size_t hyperloglog<T, Scope, Hash, Allocator>::sketch_bytes(
+  cuco::precision precision) noexcept
+{
+  return ref_type<>::sketch_bytes(precision);
+}
+
 template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
 constexpr size_t hyperloglog<T, Scope, Hash, Allocator>::sketch_alignment() noexcept
 {
diff --git a/include/cuco/detail/hyperloglog/hyperloglog_impl.cuh b/include/cuco/detail/hyperloglog/hyperloglog_impl.cuh
@@ -519,12 +519,13 @@ class hyperloglog_impl {
    *
    * @return The number of bytes required for the sketch
    */
-  [[nodiscard]] __host__ __device__ static constexpr size_t sketch_bytes(
+  [[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_bytes(
     cuco::sketch_size_kb sketch_size_kb) noexcept
   {
     // minimum precision is 4 or 64 bytes
-    return cuda::std::max(static_cast<size_t>(sizeof(register_type) * 1ull << 4),
-                          cuda::std::bit_floor(static_cast<size_t>(sketch_size_kb * 1024)));
+    return cuda::std::max(
+      static_cast<cuda::std::size_t>(sizeof(register_type) * 1ull << 4),
+      cuda::std::bit_floor(static_cast<cuda::std::size_t>(sketch_size_kb * 1024)));
   }
 
   /**
@@ -534,16 +535,16 @@ class hyperloglog_impl {
    *
    * @return The number of bytes required for the sketch
    */
-  [[nodiscard]] __host__ __device__ static constexpr std::size_t sketch_bytes(
+  [[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_bytes(
     cuco::standard_deviation standard_deviation) noexcept
   {
     // implementation taken from
     // https://github.com/apache/spark/blob/6a27789ad7d59cd133653a49be0bb49729542abe/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala#L43
 
     //  minimum precision is 4 or 64 bytes
     auto const precision = cuda::std::max(
-      static_cast<int32_t>(4),
-      static_cast<int32_t>(
+      static_cast<cuda::std::int32_t>(4),
+      static_cast<cuda::std::int32_t>(
         cuda::std::ceil(2.0 * cuda::std::log(1.106 / standard_deviation) / cuda::std::log(2.0))));
 
     // inverse of this function (ommitting the minimum precision constraint) is
@@ -552,14 +553,30 @@ class hyperloglog_impl {
     return sizeof(register_type) * (1ull << precision);
   }
 
+  /**
+   * @brief Gets the number of bytes required for the sketch storage.
+   *
+   * @param precision HyperLogLog precision parameter
+   *
+   * @return The number of bytes required for the sketch
+   */
+  [[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_bytes(
+    cuco::precision precision) noexcept
+  {
+    // minimum precision is 4 or 64 bytes
+    auto const clamped_precision =
+      cuda::std::max(cuda::std::int32_t{4}, cuda::std::int32_t{precision});
+    return cuda::std::size_t{sizeof(register_type) * (1ull << clamped_precision)};
+  }
+
   /**
    * @brief Gets the alignment required for the sketch storage.
    *
    * @return The required alignment
    */
-  [[nodiscard]] __host__ __device__ static constexpr size_t sketch_alignment() noexcept
+  [[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_alignment() noexcept
   {
-    return alignof(register_type);
+    return cuda::std::size_t{alignof(register_type)};
   }
 
  private:
diff --git a/include/cuco/detail/hyperloglog/hyperloglog_ref.inl b/include/cuco/detail/hyperloglog/hyperloglog_ref.inl
@@ -147,6 +147,13 @@ __host__ __device__ constexpr std::size_t hyperloglog_ref<T, Scope, Hash>::sketc
   return impl_type::sketch_bytes(standard_deviation);
 }
 
+template <class T, cuda::thread_scope Scope, class Hash>
+__host__ __device__ constexpr std::size_t hyperloglog_ref<T, Scope, Hash>::sketch_bytes(
+  cuco::precision precision) noexcept
+{
+  return impl_type::sketch_bytes(precision);
+}
+
 template <class T, cuda::thread_scope Scope, class Hash>
 __host__ __device__ constexpr std::size_t
 hyperloglog_ref<T, Scope, Hash>::sketch_alignment() noexcept
diff --git a/include/cuco/hyperloglog.cuh b/include/cuco/hyperloglog.cuh
@@ -90,6 +90,22 @@ class hyperloglog {
                         Allocator const& alloc  = {},
                         cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
 
+  /**
+   * @brief Constructs a `hyperloglog` host object.
+   *
+   * @note This function synchronizes the given stream.
+   *
+   * @param precision HyperLogLog precision parameter (determines number of registers as
+   * 2^precision)
+   * @param hash The hash function used to hash items
+   * @param alloc Allocator used for allocating device storage
+   * @param stream CUDA stream used to initialize the object
+   */
+  constexpr hyperloglog(cuco::precision precision,
+                        Hash const& hash        = {},
+                        Allocator const& alloc  = {},
+                        cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
+
   ~hyperloglog() = default;
 
   hyperloglog(hyperloglog const&)            = delete;
@@ -308,6 +324,15 @@ class hyperloglog {
   [[nodiscard]] static constexpr std::size_t sketch_bytes(
     cuco::standard_deviation standard_deviation) noexcept;
 
+  /**
+   * @brief Gets the number of bytes required for the sketch storage.
+   *
+   * @param precision HyperLogLog precision parameter
+   *
+   * @return The number of bytes required for the sketch
+   */
+  [[nodiscard]] static constexpr std::size_t sketch_bytes(cuco::precision precision) noexcept;
+
   /**
    * @brief Gets the alignment required for the sketch storage.
    *
diff --git a/include/cuco/hyperloglog_ref.cuh b/include/cuco/hyperloglog_ref.cuh
@@ -275,6 +275,16 @@ class hyperloglog_ref {
   [[nodiscard]] __host__ __device__ static constexpr std::size_t sketch_bytes(
     cuco::standard_deviation standard_deviation) noexcept;
 
+  /**
+   * @brief Gets the number of bytes required for the sketch storage.
+   *
+   * @param precision HyperLogLog precision parameter
+   *
+   * @return The number of bytes required for the sketch
+   */
+  [[nodiscard]] __host__ __device__ static constexpr std::size_t sketch_bytes(
+    cuco::precision precision) noexcept;
+
   /**
    * @brief Gets the alignment required for the sketch storage.
    *
diff --git a/include/cuco/types.cuh b/include/cuco/types.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -58,6 +58,15 @@ CUCO_DEFINE_STRONG_TYPE(sketch_size_kb, double);
  */
 CUCO_DEFINE_STRONG_TYPE(standard_deviation, double);
 
+/**
+ * @brief A strong type wrapper `cuco::precision` for specifying the HyperLogLog precision
+ * parameter of `cuco::hyperloglog(_ref)`.
+ *
+ * @note Precision `p` determines the number of registers as `2^p`. Valid range is typically [4,
+ * 18].
+ */
+CUCO_DEFINE_STRONG_TYPE(precision, int32_t);
+
 }  // namespace cuco
 
 // User-defined literal operators for `cuco::sketch_size_KB`