From 86f19721ad58f13151bc84fb0d6de0500014ec29 Mon Sep 17 00:00:00 2001 From: Zihao Zhao Date: Thu, 6 Nov 2025 06:38:12 +0200 Subject: [PATCH] UCT/DEVICE: ring db in atomic way --- src/uct/ib/mlx5/gdaki/gdaki.cuh | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/uct/ib/mlx5/gdaki/gdaki.cuh b/src/uct/ib/mlx5/gdaki/gdaki.cuh index 679ba529fec..84995dffd5f 100644 --- a/src/uct/ib/mlx5/gdaki/gdaki.cuh +++ b/src/uct/ib/mlx5/gdaki/gdaki.cuh @@ -258,24 +258,25 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep, { cuda::atomic_ref ref( ep->sq_ready_index); - uint64_t wqe_base_orig = wqe_base; + const uint64_t wqe_next = wqe_base + count; + const bool skip_db = !(flags & UCT_DEVICE_FLAG_NODELAY) && + !((wqe_base ^ wqe_next) & 128); __threadfence(); - while (!ref.compare_exchange_strong(wqe_base, wqe_base + count, - cuda::std::memory_order_relaxed)) { - wqe_base = wqe_base_orig; - } - - if (!(flags & UCT_DEVICE_FLAG_NODELAY) && - !((wqe_base ^ (wqe_base + count)) & 128)) { - return; + if (skip_db) { + const uint64_t wqe_base_orig = wqe_base; + while (!ref.compare_exchange_strong(wqe_base, wqe_next, + cuda::std::memory_order_relaxed)) { + wqe_base = wqe_base_orig; + } + } else { + while (READ_ONCE(ep->sq_ready_index) != wqe_base) { + } + uct_rc_mlx5_gda_ring_db(ep, wqe_next); + uct_rc_mlx5_gda_update_dbr(ep, wqe_next); + uct_rc_mlx5_gda_ring_db(ep, wqe_next); + ref.store(wqe_next, cuda::std::memory_order_release); } - - uct_rc_mlx5_gda_lock(&ep->sq_lock); - uct_rc_mlx5_gda_ring_db(ep, ep->sq_ready_index); - uct_rc_mlx5_gda_update_dbr(ep, ep->sq_ready_index); - uct_rc_mlx5_gda_ring_db(ep, ep->sq_ready_index); - uct_rc_mlx5_gda_unlock(&ep->sq_lock); } UCS_F_DEVICE bool