Skip to content

Commit de5ed00

Browse files
Artemy-Mellanoxjgunthorpe
authored andcommitted
IB/mlx5: Fix implicit ODP race
Following race may occur because of the call_srcu and the placement of the synchronize_srcu vs the xa_erase. CPU0 CPU1 mlx5_ib_free_implicit_mr: destroy_unused_implicit_child_mr: xa_erase(odp_mkeys) synchronize_srcu() xa_lock(implicit_children) if (still in xarray) atomic_inc() call_srcu() xa_unlock(implicit_children) xa_erase(implicit_children): xa_lock(implicit_children) __xa_erase() xa_unlock(implicit_children) flush_workqueue() [..] free_implicit_child_mr_rcu: (via call_srcu) queue_work() WARN_ON(atomic_read()) [..] free_implicit_child_mr_work: (via wq) free_implicit_child_mr() mlx5_mr_cache_invalidate() mlx5_ib_update_xlt() <-- UMR QP fail atomic_dec() The wait_event() solves the race because it blocks until free_implicit_child_mr_work() completes. Fixes: 5256edc ("RDMA/mlx5: Rework implicit ODP destroy") Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Artemy Kovalyov <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 817a68a commit de5ed00

File tree

2 files changed

+8
-10
lines changed

2 files changed

+8
-10
lines changed

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ struct mlx5_ib_mr {
636636

637637
/* For ODP and implicit */
638638
atomic_t num_deferred_work;
639+
wait_queue_head_t q_deferred_work;
639640
struct xarray implicit_children;
640641
union {
641642
struct rcu_head rcu;

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
235235
mr->parent = NULL;
236236
mlx5_mr_cache_free(mr->dev, mr);
237237
ib_umem_odp_release(odp);
238-
atomic_dec(&imr->num_deferred_work);
238+
if (atomic_dec_and_test(&imr->num_deferred_work))
239+
wake_up(&imr->q_deferred_work);
239240
}
240241

241242
static void free_implicit_child_mr_work(struct work_struct *work)
@@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
554555
imr->umem = &umem_odp->umem;
555556
imr->is_odp_implicit = true;
556557
atomic_set(&imr->num_deferred_work, 0);
558+
init_waitqueue_head(&imr->q_deferred_work);
557559
xa_init(&imr->implicit_children);
558560

559561
err = mlx5_ib_update_xlt(imr, 0,
@@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
611613
* under xa_lock while the child is in the xarray. Thus at this point
612614
* it is only decreasing, and all work holding it is now on the wq.
613615
*/
614-
if (atomic_read(&imr->num_deferred_work)) {
615-
flush_workqueue(system_unbound_wq);
616-
WARN_ON(atomic_read(&imr->num_deferred_work));
617-
}
616+
wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
618617

619618
/*
620619
* Fence the imr before we destroy the children. This allows us to
@@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
645644
/* Wait for all running page-fault handlers to finish. */
646645
synchronize_srcu(&mr->dev->odp_srcu);
647646

648-
if (atomic_read(&mr->num_deferred_work)) {
649-
flush_workqueue(system_unbound_wq);
650-
WARN_ON(atomic_read(&mr->num_deferred_work));
651-
}
647+
wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
652648

653649
dma_fence_odp_mr(mr);
654650
}
@@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work)
17201716
u32 i;
17211717

17221718
for (i = 0; i < work->num_sge; ++i)
1723-
atomic_dec(&work->frags[i].mr->num_deferred_work);
1719+
if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work))
1720+
wake_up(&work->frags[i].mr->q_deferred_work);
17241721
kvfree(work);
17251722
}
17261723

0 commit comments

Comments
 (0)