Skip to content

Commit 1130a8b

Browse files
committed
Reorder logic in flush_output_buffer.
1 parent b97aa9f commit 1130a8b

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

include/cuco/detail/static_multimap/device_view_impl.inl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,13 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
498498
}
499499
offset = g.shfl(offset, 0);
500500

501-
if constexpr (thrust::is_contiguous_iterator_v<OutputIt>) {
502501
#if defined(CUCO_HAS_CG_MEMCPY_ASYNC)
502+
constexpr bool uses_memcpy_async = thrust::is_contiguous_iterator_v<OutputIt>;
503+
#else
504+
constexpr bool uses_memcpy_async = false;
505+
#endif // end CUCO_HAS_CG_MEMCPY_ASYNC
506+
507+
if constexpr (uses_memcpy_async) {
503508
#if defined(CUCO_HAS_CUDA_BARRIER)
504509
cooperative_groups::memcpy_async(
505510
g,
@@ -512,9 +517,9 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
512517
output_buffer,
513518
sizeof(value_type) * num_outputs);
514519
#endif // end CUCO_HAS_CUDA_BARRIER
515-
#endif // end CUCO_HAS_CG_MEMCPY_ASYNC
516520
}
517-
if constexpr (not thrust::is_contiguous_iterator_v<OutputIt>) {
521+
522+
if constexpr (not uses_memcpy_async) {
518523
for (auto index = lane_id; index < num_outputs; index += g.size()) {
519524
*(output_begin + offset + index) = output_buffer[index];
520525
}

0 commit comments

Comments
 (0)