Skip to content

Commit a7a2c43

Browse files
committed
Minor cleanups
1 parent 174940e commit a7a2c43

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,6 +1220,7 @@ class open_addressing_ref_impl {
12201220
}
12211221

12221222
// Fill the buffer if any matching keys are found
1223+
auto const lane_id = probing_tile.thread_rank();
12231224
if (thrust::any_of(thrust::seq, exists, exists + bucket_size, thrust::identity{})) {
12241225
if constexpr (IsOuter) { found_match = true; }
12251226

@@ -1233,7 +1234,7 @@ class open_addressing_ref_impl {
12331234
thrust::reduce(thrust::seq, num_matches, num_matches + bucket_size);
12341235

12351236
int32_t output_idx;
1236-
if (probing_tile.thread_rank() == 0) {
1237+
if (lane_id == 0) {
12371238
auto ref =
12381239
cuda::atomic_ref<int32_t, cuda::thread_scope_block>{counters[flushing_tile_id]};
12391240
output_idx = ref.fetch_add(total_matches, cuda::memory_order_relaxed);
@@ -1244,8 +1245,7 @@ class open_addressing_ref_impl {
12441245
#pragma unroll buffer_size
12451246
for (int32_t i = 0; i < bucket_size; ++i) {
12461247
if (equals[i]) {
1247-
auto const lane_offset =
1248-
detail::count_least_significant_bits(exists[i], probing_tile.thread_rank());
1248+
auto const lane_offset = detail::count_least_significant_bits(exists[i], lane_id);
12491249
buffers[flushing_tile_id][output_idx + matche_offset + lane_offset] = {
12501250
probe_key, bucket_slots[i]};
12511251
}
@@ -1255,7 +1255,7 @@ class open_addressing_ref_impl {
12551255
// Special handling for outer cases where no match is found
12561256
if constexpr (IsOuter) {
12571257
if (!running) {
1258-
if (!found_match and probing_tile.thread_rank() == 0) {
1258+
if (!found_match and lane_id == 0) {
12591259
auto ref =
12601260
cuda::atomic_ref<int32_t, cuda::thread_scope_block>{counters[flushing_tile_id]};
12611261
auto const output_idx = ref.fetch_add(1, cuda::memory_order_relaxed);

0 commit comments

Comments
 (0)