Skip to content

Commit 4680a6b

Browse files
committed
UCT/DEVICE: support no delay
1 parent 1e9db41 commit 4680a6b

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

src/uct/ib/mlx5/gdaki/gdaki.cuh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep,
263263
{
264264
cuda::atomic_ref<uint64_t, cuda::thread_scope_device> ref(
265265
ep->sq_ready_index);
266-
const uint64_t wqe_num = __ldg(&ep->sq_wqe_num);
266+
const bool no_delay = (flags & UCT_DEVICE_FLAG_NODELAY);
267267
const uint64_t wqe_next = wqe_base + count;
268268
const uint64_t wqe_base_orig = wqe_base;
269269

@@ -273,9 +273,17 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep,
273273
wqe_base = wqe_base_orig;
274274
}
275275

276+
if (no_delay) {
277+
const uint64_t ready_index = ep->sq_ready_index;
278+
uct_rc_mlx5_gda_ring_db(ep, ready_index);
279+
uct_rc_mlx5_gda_update_dbr(ep, ready_index);
280+
uct_rc_mlx5_gda_ring_db(ep, ready_index);
281+
return;
282+
}
283+
276284
if (READ_ONCE(ep->sq_ready_index) == wqe_next) {
277285
uct_rc_mlx5_gda_lock(&ep->sq_lock);
278-
const uint64_t ready_index = READ_ONCE(ep->sq_ready_index);
286+
const uint64_t ready_index = ep->sq_ready_index;
279287
if (ready_index == wqe_next) {
280288
uct_rc_mlx5_gda_ring_db(ep, ready_index);
281289
uct_rc_mlx5_gda_update_dbr(ep, ready_index);

0 commit comments

Comments
 (0)