File tree Expand file tree Collapse file tree 1 file changed +10
-2
lines changed Expand file tree Collapse file tree 1 file changed +10
-2
lines changed Original file line number Diff line number Diff line change @@ -263,7 +263,7 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep,
263263{
264264 cuda::atomic_ref<uint64_t , cuda::thread_scope_device> ref (
265265 ep->sq_ready_index );
266- const uint64_t wqe_num = __ldg (&ep-> sq_wqe_num );
266+ const bool no_delay = (flags & UCT_DEVICE_FLAG_NODELAY );
267267 const uint64_t wqe_next = wqe_base + count;
268268 const uint64_t wqe_base_orig = wqe_base;
269269
@@ -273,9 +273,17 @@ UCS_F_DEVICE void uct_rc_mlx5_gda_db(uct_rc_gdaki_dev_ep_t *ep,
273273 wqe_base = wqe_base_orig;
274274 }
275275
276+ if (no_delay) {
277+ const uint64_t ready_index = ep->sq_ready_index ;
278+ uct_rc_mlx5_gda_ring_db (ep, ready_index);
279+ uct_rc_mlx5_gda_update_dbr (ep, ready_index);
280+ uct_rc_mlx5_gda_ring_db (ep, ready_index);
281+ return ;
282+ }
283+
276284 if (READ_ONCE (ep->sq_ready_index ) == wqe_next) {
277285 uct_rc_mlx5_gda_lock (&ep->sq_lock );
278- const uint64_t ready_index = READ_ONCE ( ep->sq_ready_index ) ;
286+ const uint64_t ready_index = ep->sq_ready_index ;
279287 if (ready_index == wqe_next) {
280288 uct_rc_mlx5_gda_ring_db (ep, ready_index);
281289 uct_rc_mlx5_gda_update_dbr (ep, ready_index);
You can’t perform that action at this time.
0 commit comments