Skip to content

Commit 21f9024

Browse files
Israel RukshinChristoph Hellwig
authored andcommitted
nvmet-rdma: fix double free of rdma queue
In case rdma accept fails at nvmet_rdma_queue_connect(), release work is scheduled. Later on, a new RDMA CM event may arrive since we didn't destroy the cm-id and call nvmet_rdma_queue_connect_fail(), which schedule another release work. This will cause calling nvmet_rdma_free_queue twice. To fix this we implicitly destroy the cm_id with non-zero ret code, which guarantees that new rdma_cm events will not arrive afterwards. Also add a qp pointer to nvmet_rdma_queue structure, so we can use it when the cm_id pointer is NULL or was destroyed. Signed-off-by: Israel Rukshin <[email protected]> Suggested-by: Sagi Grimberg <[email protected]> Reviewed-by: Max Gurtovoy <[email protected]> Reviewed-by: Sagi Grimberg <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 8c5c660 commit 21f9024

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

drivers/nvme/target/rdma.c

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state {
7878

7979
struct nvmet_rdma_queue {
8080
struct rdma_cm_id *cm_id;
81+
struct ib_qp *qp;
8182
struct nvmet_port *port;
8283
struct ib_cq *cq;
8384
atomic_t sq_wr_avail;
@@ -474,7 +475,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
474475
if (ndev->srq)
475476
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
476477
else
477-
ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
478+
ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
478479

479480
if (unlikely(ret))
480481
pr_err("post_recv cmd failed\n");
@@ -513,7 +514,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
513514
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
514515

515516
if (rsp->n_rdma) {
516-
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
517+
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
517518
queue->cm_id->port_num, rsp->req.sg,
518519
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
519520
}
@@ -597,7 +598,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
597598

598599
WARN_ON(rsp->n_rdma <= 0);
599600
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
600-
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
601+
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
601602
queue->cm_id->port_num, rsp->req.sg,
602603
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
603604
rsp->n_rdma = 0;
@@ -752,7 +753,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
752753
}
753754

754755
if (nvmet_rdma_need_data_in(rsp)) {
755-
if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp,
756+
if (rdma_rw_ctx_post(&rsp->rw, queue->qp,
756757
queue->cm_id->port_num, &rsp->read_cqe, NULL))
757758
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
758759
} else {
@@ -1038,6 +1039,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
10381039
pr_err("failed to create_qp ret= %d\n", ret);
10391040
goto err_destroy_cq;
10401041
}
1042+
queue->qp = queue->cm_id->qp;
10411043

10421044
atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
10431045

@@ -1066,11 +1068,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
10661068

10671069
static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
10681070
{
1069-
struct ib_qp *qp = queue->cm_id->qp;
1070-
1071-
ib_drain_qp(qp);
1072-
rdma_destroy_id(queue->cm_id);
1073-
ib_destroy_qp(qp);
1071+
ib_drain_qp(queue->qp);
1072+
if (queue->cm_id)
1073+
rdma_destroy_id(queue->cm_id);
1074+
ib_destroy_qp(queue->qp);
10741075
ib_free_cq(queue->cq);
10751076
}
10761077

@@ -1305,9 +1306,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
13051306

13061307
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
13071308
if (ret) {
1308-
schedule_work(&queue->release_work);
1309-
/* Destroying rdma_cm id is not needed here */
1310-
return 0;
1309+
/*
1310+
* Don't destroy the cm_id in free path, as we implicitly
1311+
* destroy the cm_id here with non-zero ret code.
1312+
*/
1313+
queue->cm_id = NULL;
1314+
goto free_queue;
13111315
}
13121316

13131317
mutex_lock(&nvmet_rdma_queue_mutex);
@@ -1316,6 +1320,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
13161320

13171321
return 0;
13181322

1323+
free_queue:
1324+
nvmet_rdma_free_queue(queue);
13191325
put_device:
13201326
kref_put(&ndev->ref, nvmet_rdma_free_dev);
13211327

0 commit comments

Comments
 (0)