Skip to content

Commit 4e68e8a

Browse files
committed
Code simplifications
1 parent 41f2939 commit 4e68e8a

File tree

2 files changed

+38
-42
lines changed

2 files changed

+38
-42
lines changed

include/cuco/detail/roaring_bitmap/roaring_bitmap_impl.cuh

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
113113
// linear search
114114
#pragma unroll
115115
for (cuda::std::uint32_t i = 0; i < storage_ref_.metadata().num_containers; i++) {
116+
cuda::std::byte const* key_ptr =
117+
storage_ref_.key_cards() + (i * 2) * sizeof(cuda::std::uint16_t);
116118
if constexpr (Aligned) {
117-
key = aligned_load<cuda::std::uint16_t>(storage_ref_.key_cards() +
118-
(i * 2) * sizeof(cuda::std::uint16_t));
119+
key = aligned_load<cuda::std::uint16_t>(key_ptr);
119120
} else {
120-
key = misaligned_load<cuda::std::uint16_t>(storage_ref_.key_cards() +
121-
(i * 2) * sizeof(cuda::std::uint16_t));
121+
key = misaligned_load<cuda::std::uint16_t>(key_ptr);
122122
}
123123
if (key == upper) { return this->contains_container<Aligned>(lower, i); }
124124
if (key > upper) { return false; }
@@ -129,12 +129,12 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
129129
cuda::std::uint32_t right = storage_ref_.metadata().num_containers;
130130
while (left < right) {
131131
cuda::std::uint32_t mid = left + (right - left) / 2;
132+
cuda::std::byte const* key_ptr =
133+
storage_ref_.key_cards() + (mid * 2) * sizeof(cuda::std::uint16_t);
132134
if constexpr (Aligned) {
133-
key = aligned_load<cuda::std::uint16_t>(storage_ref_.key_cards() +
134-
(mid * 2) * sizeof(cuda::std::uint16_t));
135+
key = aligned_load<cuda::std::uint16_t>(key_ptr);
135136
} else {
136-
key = misaligned_load<cuda::std::uint16_t>(storage_ref_.key_cards() +
137-
(mid * 2) * sizeof(cuda::std::uint16_t));
137+
key = misaligned_load<cuda::std::uint16_t>(key_ptr);
138138
}
139139

140140
if (key == upper) {
@@ -170,29 +170,29 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
170170
__device__ bool contains_container(cuda::std::uint16_t lower, cuda::std::uint32_t index) const
171171
{
172172
cuda::std::uint32_t offset;
173+
cuda::std::byte const* offset_ptr =
174+
storage_ref_.container_offsets() + index * sizeof(cuda::std::uint32_t);
173175
if (offsets_aligned_) {
174-
offset = aligned_load<cuda::std::uint32_t>(storage_ref_.container_offsets() +
175-
index * sizeof(cuda::std::uint32_t));
176+
offset = aligned_load<cuda::std::uint32_t>(offset_ptr);
176177
} else {
177-
offset = misaligned_load<cuda::std::uint32_t>(storage_ref_.container_offsets() +
178-
index * sizeof(cuda::std::uint32_t));
178+
offset = misaligned_load<cuda::std::uint32_t>(offset_ptr);
179179
}
180180
cuda::std::byte const* container = storage_ref_.data() + offset;
181181
if (storage_ref_.metadata().has_run and check_bit(storage_ref_.run_container_bitmap(), index)) {
182182
return this->contains_run_container<Aligned>(container, lower);
183183
} else {
184184
cuda::std::uint32_t card;
185+
cuda::std::byte const* card_ptr =
186+
storage_ref_.key_cards() + (index * 2 + 1) * sizeof(cuda::std::uint16_t);
185187
if constexpr (Aligned) {
186-
card = 1u + aligned_load<cuda::std::uint16_t>(
187-
storage_ref_.key_cards() + (index * 2 + 1) * sizeof(cuda::std::uint16_t));
188+
card = 1u + aligned_load<cuda::std::uint16_t>(card_ptr);
188189
} else {
189-
card = 1u + misaligned_load<cuda::std::uint16_t>(
190-
storage_ref_.key_cards() + (index * 2 + 1) * sizeof(cuda::std::uint16_t));
190+
card = 1u + misaligned_load<cuda::std::uint16_t>(card_ptr);
191191
}
192192
if (card <= storage_ref_type::metadata_type::max_array_container_card) {
193193
return this->contains_array_container<Aligned>(container, lower, card);
194194
} else {
195-
return this->contains_bitset_container(container, lower, card);
195+
return this->contains_bitset_container(container, lower);
196196
}
197197
}
198198
}
@@ -206,10 +206,11 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
206206
// Use linear search for small arrays, binary search for larger ones
207207
if (card < binary_search_threshold) {
208208
for (cuda::std::uint32_t i = 0; i < card; i++) {
209+
cuda::std::byte const* elem_ptr = container + i * sizeof(cuda::std::uint16_t);
209210
if constexpr (Aligned) {
210-
elem = aligned_load<cuda::std::uint16_t>(container + i * sizeof(cuda::std::uint16_t));
211+
elem = aligned_load<cuda::std::uint16_t>(elem_ptr);
211212
} else {
212-
elem = misaligned_load<cuda::std::uint16_t>(container + i * sizeof(cuda::std::uint16_t));
213+
elem = misaligned_load<cuda::std::uint16_t>(elem_ptr);
213214
}
214215
if (elem == lower) { return true; }
215216
}
@@ -219,12 +220,12 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
219220
cuda::std::uint32_t right = card;
220221

221222
while (left < right) {
222-
cuda::std::uint32_t mid = left + (right - left) / 2;
223+
cuda::std::uint32_t mid = left + (right - left) / 2;
224+
cuda::std::byte const* elem_ptr = container + mid * sizeof(cuda::std::uint16_t);
223225
if constexpr (Aligned) {
224-
elem = aligned_load<cuda::std::uint16_t>(container + mid * sizeof(cuda::std::uint16_t));
226+
elem = aligned_load<cuda::std::uint16_t>(elem_ptr);
225227
} else {
226-
elem =
227-
misaligned_load<cuda::std::uint16_t>(container + mid * sizeof(cuda::std::uint16_t));
228+
elem = misaligned_load<cuda::std::uint16_t>(elem_ptr);
228229
}
229230
if (elem == lower) {
230231
return true;
@@ -239,11 +240,9 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
239240
}
240241

241242
__device__ bool contains_bitset_container(cuda::std::byte const* container,
242-
cuda::std::uint16_t lower,
243-
cuda::std::uint32_t card) const
243+
cuda::std::uint16_t lower) const
244244
{
245-
return static_cast<cuda::std::uint8_t>(container[lower / 8]) &
246-
(cuda::std::uint8_t(1) << (lower % 8));
245+
return check_bit(container, lower);
247246
}
248247

249248
template <bool Aligned>
@@ -262,19 +261,18 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
262261
cuda::std::uint32_t end;
263262

264263
for (cuda::std::uint32_t i = 0; i < num_runs; i++) {
264+
// the first 16 bits of the run container denotes the number of runs
265+
// followed by the sequence of runs as (start, end) U16 pairs
266+
cuda::std::byte const* start_ptr = container + (i * 2 + 1) * sizeof(cuda::std::uint16_t);
265267
// TODO load start+end in one instruction
266268
if constexpr (Aligned) {
267-
start =
268-
aligned_load<cuda::std::uint16_t>(container + (i * 2 + 1) * sizeof(cuda::std::uint16_t));
269-
end =
270-
static_cast<cuda::std::uint32_t>(start) +
271-
aligned_load<cuda::std::uint16_t>(container + (i * 2 + 2) * sizeof(cuda::std::uint16_t));
269+
start = aligned_load<cuda::std::uint16_t>(start_ptr);
270+
end = static_cast<cuda::std::uint32_t>(start) +
271+
aligned_load<cuda::std::uint16_t>(start_ptr + sizeof(cuda::std::uint16_t));
272272
} else {
273-
start = misaligned_load<cuda::std::uint16_t>(container +
274-
(i * 2 + 1) * sizeof(cuda::std::uint16_t));
273+
start = misaligned_load<cuda::std::uint16_t>(start_ptr);
275274
end = static_cast<cuda::std::uint32_t>(start) +
276-
misaligned_load<cuda::std::uint16_t>(container +
277-
(i * 2 + 2) * sizeof(cuda::std::uint16_t));
275+
misaligned_load<cuda::std::uint16_t>(start_ptr + sizeof(cuda::std::uint16_t));
278276
}
279277
if (start <= lower && end >= lower) { return true; }
280278
if (start > lower) { break; }

include/cuco/detail/roaring_bitmap/util.cuh

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,12 @@ struct roaring_bitmap_metadata<cuda::std::uint32_t> {
138138

139139
cuda::std::uint32_t card = 0;
140140
for (cuda::std::int32_t i = 0; i < num_containers; i++) {
141+
cuda::std::byte const* card_ptr =
142+
bitmap + key_cards + (i * 2 + 1) * sizeof(cuda::std::uint16_t);
141143
if (aligned_16) {
142-
card = aligned_load<cuda::std::uint16_t>(bitmap + key_cards +
143-
(i * 2 + 1) * sizeof(cuda::std::uint16_t)) +
144-
1u;
144+
card = 1u + aligned_load<cuda::std::uint16_t>(card_ptr);
145145
} else {
146-
card = misaligned_load<cuda::std::uint16_t>(bitmap + key_cards +
147-
(i * 2 + 1) * sizeof(cuda::std::uint16_t)) +
148-
1u;
146+
card = 1u + misaligned_load<cuda::std::uint16_t>(card_ptr);
149147
}
150148
num_keys += card;
151149
}

0 commit comments

Comments
 (0)