Skip to content

Commit 86f1972

Browse files
committed
UCT/DEVICE: ring db in atomic way
1 parent 14219b1 commit 86f1972

File tree

1 file changed

+16
-15
lines changed

1 file changed

+16
-15
lines changed

src/uct/ib/mlx5/gdaki/gdaki.cuh

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -258,24 +258,25 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep,
258258
{
259259
cuda::atomic_ref<uint64_t, cuda::thread_scope_device> ref(
260260
ep->sq_ready_index);
261-
uint64_t wqe_base_orig = wqe_base;
261+
const uint64_t wqe_next = wqe_base + count;
262+
const bool skip_db = !(flags & UCT_DEVICE_FLAG_NODELAY) &&
263+
!((wqe_base ^ wqe_next) & 128);
262264

263265
__threadfence();
264-
while (!ref.compare_exchange_strong(wqe_base, wqe_base + count,
265-
cuda::std::memory_order_relaxed)) {
266-
wqe_base = wqe_base_orig;
267-
}
268-
269-
if (!(flags & UCT_DEVICE_FLAG_NODELAY) &&
270-
!((wqe_base ^ (wqe_base + count)) & 128)) {
271-
return;
266+
if (skip_db) {
267+
const uint64_t wqe_base_orig = wqe_base;
268+
while (!ref.compare_exchange_strong(wqe_base, wqe_next,
269+
cuda::std::memory_order_relaxed)) {
270+
wqe_base = wqe_base_orig;
271+
}
272+
} else {
273+
while (READ_ONCE(ep->sq_ready_index) != wqe_base) {
274+
}
275+
uct_rc_mlx5_gda_ring_db(ep, wqe_next);
276+
uct_rc_mlx5_gda_update_dbr(ep, wqe_next);
277+
uct_rc_mlx5_gda_ring_db(ep, wqe_next);
278+
ref.store(wqe_next, cuda::std::memory_order_release);
272279
}
273-
274-
uct_rc_mlx5_gda_lock(&ep->sq_lock);
275-
uct_rc_mlx5_gda_ring_db(ep, ep->sq_ready_index);
276-
uct_rc_mlx5_gda_update_dbr(ep, ep->sq_ready_index);
277-
uct_rc_mlx5_gda_ring_db(ep, ep->sq_ready_index);
278-
uct_rc_mlx5_gda_unlock(&ep->sq_lock);
279280
}
280281

281282
UCS_F_DEVICE bool

0 commit comments

Comments
 (0)