Skip to content

Commit 0bd37a6

Browse files
committed
Expose HLL ctor overload taking precision
1 parent 711b442 commit 0bd37a6

File tree

6 files changed

+100
-9
lines changed

6 files changed

+100
-9
lines changed

include/cuco/detail/hyperloglog/hyperloglog.inl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,22 @@ constexpr hyperloglog<T, Scope, Hash, Allocator>::hyperloglog(
4949
this->clear_async(stream);
5050
}
5151

52+
template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
53+
constexpr hyperloglog<T, Scope, Hash, Allocator>::hyperloglog(cuco::precision precision,
54+
Hash const& hash,
55+
Allocator const& alloc,
56+
cuda::stream_ref stream)
57+
: allocator_{alloc},
58+
sketch_{
59+
allocator_.allocate(sketch_bytes(precision) / sizeof(register_type), stream),
60+
detail::custom_deleter{sketch_bytes(precision) / sizeof(register_type), allocator_, stream}},
61+
ref_{
62+
cuda::std::span{reinterpret_cast<cuda::std::byte*>(sketch_.get()), sketch_bytes(precision)},
63+
hash}
64+
{
65+
this->clear_async(stream);
66+
}
67+
5268
template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
5369
constexpr void hyperloglog<T, Scope, Hash, Allocator>::clear_async(cuda::stream_ref stream) noexcept
5470
{
@@ -166,6 +182,13 @@ constexpr size_t hyperloglog<T, Scope, Hash, Allocator>::sketch_bytes(
166182
return ref_type<>::sketch_bytes(standard_deviation);
167183
}
168184

185+
template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
186+
constexpr size_t hyperloglog<T, Scope, Hash, Allocator>::sketch_bytes(
187+
cuco::precision precision) noexcept
188+
{
189+
return ref_type<>::sketch_bytes(precision);
190+
}
191+
169192
template <class T, cuda::thread_scope Scope, class Hash, class Allocator>
170193
constexpr size_t hyperloglog<T, Scope, Hash, Allocator>::sketch_alignment() noexcept
171194
{

include/cuco/detail/hyperloglog/hyperloglog_impl.cuh

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -519,12 +519,13 @@ class hyperloglog_impl {
519519
*
520520
* @return The number of bytes required for the sketch
521521
*/
522-
[[nodiscard]] __host__ __device__ static constexpr size_t sketch_bytes(
522+
[[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_bytes(
523523
cuco::sketch_size_kb sketch_size_kb) noexcept
524524
{
525525
// minimum precision is 4 or 64 bytes
526-
return cuda::std::max(static_cast<size_t>(sizeof(register_type) * 1ull << 4),
527-
cuda::std::bit_floor(static_cast<size_t>(sketch_size_kb * 1024)));
526+
return cuda::std::max(
527+
static_cast<cuda::std::size_t>(sizeof(register_type) * 1ull << 4),
528+
cuda::std::bit_floor(static_cast<cuda::std::size_t>(sketch_size_kb * 1024)));
528529
}
529530

530531
/**
@@ -534,16 +535,16 @@ class hyperloglog_impl {
534535
*
535536
* @return The number of bytes required for the sketch
536537
*/
537-
[[nodiscard]] __host__ __device__ static constexpr std::size_t sketch_bytes(
538+
[[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_bytes(
538539
cuco::standard_deviation standard_deviation) noexcept
539540
{
540541
// implementation taken from
541542
// https://github.com/apache/spark/blob/6a27789ad7d59cd133653a49be0bb49729542abe/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala#L43
542543

543544
// minimum precision is 4 or 64 bytes
544545
auto const precision = cuda::std::max(
545-
static_cast<int32_t>(4),
546-
static_cast<int32_t>(
546+
static_cast<cuda::std::int32_t>(4),
547+
static_cast<cuda::std::int32_t>(
547548
cuda::std::ceil(2.0 * cuda::std::log(1.106 / standard_deviation) / cuda::std::log(2.0))));
548549

549550
// inverse of this function (ommitting the minimum precision constraint) is
@@ -552,14 +553,30 @@ class hyperloglog_impl {
552553
return sizeof(register_type) * (1ull << precision);
553554
}
554555

556+
/**
557+
* @brief Gets the number of bytes required for the sketch storage.
558+
*
559+
* @param precision HyperLogLog precision parameter
560+
*
561+
* @return The number of bytes required for the sketch
562+
*/
563+
[[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_bytes(
564+
cuco::precision precision) noexcept
565+
{
566+
// minimum precision is 4 or 64 bytes
567+
auto const clamped_precision =
568+
cuda::std::max(cuda::std::int32_t{4}, cuda::std::int32_t{precision});
569+
return cuda::std::size_t{sizeof(register_type) * (1ull << clamped_precision)};
570+
}
571+
555572
/**
556573
* @brief Gets the alignment required for the sketch storage.
557574
*
558575
* @return The required alignment
559576
*/
560-
[[nodiscard]] __host__ __device__ static constexpr size_t sketch_alignment() noexcept
577+
[[nodiscard]] __host__ __device__ static constexpr cuda::std::size_t sketch_alignment() noexcept
561578
{
562-
return alignof(register_type);
579+
return cuda::std::size_t{alignof(register_type)};
563580
}
564581

565582
private:

include/cuco/detail/hyperloglog/hyperloglog_ref.inl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ __host__ __device__ constexpr std::size_t hyperloglog_ref<T, Scope, Hash>::sketc
147147
return impl_type::sketch_bytes(standard_deviation);
148148
}
149149

150+
template <class T, cuda::thread_scope Scope, class Hash>
151+
__host__ __device__ constexpr std::size_t hyperloglog_ref<T, Scope, Hash>::sketch_bytes(
152+
cuco::precision precision) noexcept
153+
{
154+
return impl_type::sketch_bytes(precision);
155+
}
156+
150157
template <class T, cuda::thread_scope Scope, class Hash>
151158
__host__ __device__ constexpr std::size_t
152159
hyperloglog_ref<T, Scope, Hash>::sketch_alignment() noexcept

include/cuco/hyperloglog.cuh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,22 @@ class hyperloglog {
9090
Allocator const& alloc = {},
9191
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
9292

93+
/**
94+
* @brief Constructs a `hyperloglog` host object.
95+
*
96+
* @note This function synchronizes the given stream.
97+
*
98+
* @param precision HyperLogLog precision parameter (determines number of registers as
99+
* 2^precision)
100+
* @param hash The hash function used to hash items
101+
* @param alloc Allocator used for allocating device storage
102+
* @param stream CUDA stream used to initialize the object
103+
*/
104+
constexpr hyperloglog(cuco::precision precision,
105+
Hash const& hash = {},
106+
Allocator const& alloc = {},
107+
cuda::stream_ref stream = cuda::stream_ref{cudaStream_t{nullptr}});
108+
93109
~hyperloglog() = default;
94110

95111
hyperloglog(hyperloglog const&) = delete;
@@ -308,6 +324,15 @@ class hyperloglog {
308324
[[nodiscard]] static constexpr std::size_t sketch_bytes(
309325
cuco::standard_deviation standard_deviation) noexcept;
310326

327+
/**
328+
* @brief Gets the number of bytes required for the sketch storage.
329+
*
330+
* @param precision HyperLogLog precision parameter
331+
*
332+
* @return The number of bytes required for the sketch
333+
*/
334+
[[nodiscard]] static constexpr std::size_t sketch_bytes(cuco::precision precision) noexcept;
335+
311336
/**
312337
* @brief Gets the alignment required for the sketch storage.
313338
*

include/cuco/hyperloglog_ref.cuh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,16 @@ class hyperloglog_ref {
275275
[[nodiscard]] __host__ __device__ static constexpr std::size_t sketch_bytes(
276276
cuco::standard_deviation standard_deviation) noexcept;
277277

278+
/**
279+
* @brief Gets the number of bytes required for the sketch storage.
280+
*
281+
* @param precision HyperLogLog precision parameter
282+
*
283+
* @return The number of bytes required for the sketch
284+
*/
285+
[[nodiscard]] __host__ __device__ static constexpr std::size_t sketch_bytes(
286+
cuco::precision precision) noexcept;
287+
278288
/**
279289
* @brief Gets the alignment required for the sketch storage.
280290
*

include/cuco/types.cuh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -58,6 +58,15 @@ CUCO_DEFINE_STRONG_TYPE(sketch_size_kb, double);
5858
*/
5959
CUCO_DEFINE_STRONG_TYPE(standard_deviation, double);
6060

61+
/**
62+
* @brief A strong type wrapper `cuco::precision` for specifying the HyperLogLog precision
63+
* parameter of `cuco::hyperloglog(_ref)`.
64+
*
65+
* @note Precision `p` determines the number of registers as `2^p`. Valid range is typically [4,
66+
* 18].
67+
*/
68+
CUCO_DEFINE_STRONG_TYPE(precision, int32_t);
69+
6170
} // namespace cuco
6271

6372
// User-defined literal operators for `cuco::sketch_size_KB`

0 commit comments

Comments
 (0)