Skip to content

Commit 25768e8

Browse files
committed
UCT/DEVICE: ring db in atomic way
1 parent 0f0e3ec commit 25768e8

File tree

1 file changed

+16
-15
lines changed

1 file changed

+16
-15
lines changed

src/uct/ib/mlx5/gdaki/gdaki.cuh

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -263,24 +263,25 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep,
263263
{
264264
cuda::atomic_ref<uint64_t, cuda::thread_scope_device> ref(
265265
ep->sq_ready_index);
266-
uint64_t wqe_base_orig = wqe_base;
266+
const uint64_t wqe_next = wqe_base + count;
267+
const bool skip_db = !(flags & UCT_DEVICE_FLAG_NODELAY) &&
268+
!((wqe_base ^ wqe_next) & 128);
267269

268270
__threadfence();
269-
while (!ref.compare_exchange_strong(wqe_base, wqe_base + count,
270-
cuda::std::memory_order_relaxed)) {
271-
wqe_base = wqe_base_orig;
272-
}
273-
274-
if (!(flags & UCT_DEVICE_FLAG_NODELAY) &&
275-
!((wqe_base ^ (wqe_base + count)) & 128)) {
276-
return;
271+
if (skip_db) {
272+
const uint64_t wqe_base_orig = wqe_base;
273+
while (!ref.compare_exchange_strong(wqe_base, wqe_next,
274+
cuda::std::memory_order_relaxed)) {
275+
wqe_base = wqe_base_orig;
276+
}
277+
} else {
278+
while (READ_ONCE(ep->sq_ready_index) != wqe_base) {
279+
}
280+
uct_rc_mlx5_gda_ring_db(ep, wqe_next);
281+
uct_rc_mlx5_gda_update_dbr(ep, wqe_next);
282+
uct_rc_mlx5_gda_ring_db(ep, wqe_next);
283+
ref.store(wqe_next, cuda::std::memory_order_release);
277284
}
278-
279-
uct_rc_mlx5_gda_lock(&ep->sq_lock);
280-
uct_rc_mlx5_gda_ring_db(ep, ep->sq_ready_index);
281-
uct_rc_mlx5_gda_update_dbr(ep, ep->sq_ready_index);
282-
uct_rc_mlx5_gda_ring_db(ep, ep->sq_ready_index);
283-
uct_rc_mlx5_gda_unlock(&ep->sq_lock);
284285
}
285286

286287
UCS_F_DEVICE bool

0 commit comments

Comments
 (0)