Skip to content

Commit 7674073

Browse files
lostandy26Christoph Hellwig
authored andcommitted
nvme-rdma: avoid request double completion for concurrent nvme_rdma_timeout
A crash happens when inject completing request long time(nearly 30s). Each name space has a request queue, when inject completing request long time, multi request queues may have time out requests at the same time, nvme_rdma_timeout will execute concurrently. Multi requests in different request queues may be queued in the same rdma queue, multi nvme_rdma_timeout may call nvme_rdma_stop_queue at the same time. The first nvme_rdma_timeout will clear NVME_RDMA_Q_LIVE and continue stopping the rdma queue(drain qp), but the others check NVME_RDMA_Q_LIVE is already cleared, and then directly complete the requests, complete request before the qp is fully drained may lead to a use-after-free condition. Add a multex lock to serialize nvme_rdma_stop_queue. Signed-off-by: Chao Leng <[email protected]> Tested-by: Israel Rukshin <[email protected]> Reviewed-by: Israel Rukshin <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 4d6b1c9 commit 7674073

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

drivers/nvme/host/rdma.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ struct nvme_rdma_queue {
9797
struct completion cm_done;
9898
bool pi_support;
9999
int cq_size;
100+
struct mutex queue_lock;
100101
};
101102

102103
struct nvme_rdma_ctrl {
@@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
579580
int ret;
580581

581582
queue = &ctrl->queues[idx];
583+
mutex_init(&queue->queue_lock);
582584
queue->ctrl = ctrl;
583585
if (idx && ctrl->ctrl.max_integrity_segments)
584586
queue->pi_support = true;
@@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
598600
if (IS_ERR(queue->cm_id)) {
599601
dev_info(ctrl->ctrl.device,
600602
"failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
601-
return PTR_ERR(queue->cm_id);
603+
ret = PTR_ERR(queue->cm_id);
604+
goto out_destroy_mutex;
602605
}
603606

604607
if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
@@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
628631
out_destroy_cm_id:
629632
rdma_destroy_id(queue->cm_id);
630633
nvme_rdma_destroy_queue_ib(queue);
634+
out_destroy_mutex:
635+
mutex_destroy(&queue->queue_lock);
631636
return ret;
632637
}
633638

@@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
639644

640645
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
641646
{
642-
if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
643-
return;
644-
__nvme_rdma_stop_queue(queue);
647+
mutex_lock(&queue->queue_lock);
648+
if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
649+
__nvme_rdma_stop_queue(queue);
650+
mutex_unlock(&queue->queue_lock);
645651
}
646652

647653
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
@@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
651657

652658
nvme_rdma_destroy_queue_ib(queue);
653659
rdma_destroy_id(queue->cm_id);
660+
mutex_destroy(&queue->queue_lock);
654661
}
655662

656663
static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)

0 commit comments

Comments
 (0)