Skip to content

Commit a032e4f

Browse files
sagigrimbergChristoph Hellwig
authored andcommitted
nvmet-rdma: fix bonding failover possible NULL deref
RDMA_CM_EVENT_ADDR_CHANGE event occur in the case of bonding failover on normal as well as on listening cm_ids. Hence this event will immediately trigger a NULL dereference trying to disconnect a queue for a cm_id that actually belongs to the port. To fix this we provide a different handler for the listener cm_ids that will defer a work to disable+(re)enable the port which essentially destroys and setups another listener cm_id Reported-by: Alex Lyakas <[email protected]> Signed-off-by: Sagi Grimberg <[email protected]> Reviewed-by: Max Gurtovoy <[email protected]> Tested-by: Alex Lyakas <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent f0e656e commit a032e4f

File tree

1 file changed

+119
-56
lines changed

1 file changed

+119
-56
lines changed

drivers/nvme/target/rdma.c

Lines changed: 119 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ struct nvmet_rdma_queue {
105105
struct list_head queue_list;
106106
};
107107

108+
struct nvmet_rdma_port {
109+
struct nvmet_port *nport;
110+
struct sockaddr_storage addr;
111+
struct rdma_cm_id *cm_id;
112+
struct delayed_work repair_work;
113+
};
114+
108115
struct nvmet_rdma_device {
109116
struct ib_device *device;
110117
struct ib_pd *pd;
@@ -917,7 +924,8 @@ static void nvmet_rdma_free_dev(struct kref *ref)
917924
static struct nvmet_rdma_device *
918925
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
919926
{
920-
struct nvmet_port *port = cm_id->context;
927+
struct nvmet_rdma_port *port = cm_id->context;
928+
struct nvmet_port *nport = port->nport;
921929
struct nvmet_rdma_device *ndev;
922930
int inline_page_count;
923931
int inline_sge_count;
@@ -934,17 +942,17 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
934942
if (!ndev)
935943
goto out_err;
936944

937-
inline_page_count = num_pages(port->inline_data_size);
945+
inline_page_count = num_pages(nport->inline_data_size);
938946
inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
939947
cm_id->device->attrs.max_recv_sge) - 1;
940948
if (inline_page_count > inline_sge_count) {
941949
pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
942-
port->inline_data_size, cm_id->device->name,
950+
nport->inline_data_size, cm_id->device->name,
943951
inline_sge_count * PAGE_SIZE);
944-
port->inline_data_size = inline_sge_count * PAGE_SIZE;
952+
nport->inline_data_size = inline_sge_count * PAGE_SIZE;
945953
inline_page_count = inline_sge_count;
946954
}
947-
ndev->inline_data_size = port->inline_data_size;
955+
ndev->inline_data_size = nport->inline_data_size;
948956
ndev->inline_page_count = inline_page_count;
949957
ndev->device = cm_id->device;
950958
kref_init(&ndev->ref);
@@ -1272,6 +1280,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
12721280
static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
12731281
struct rdma_cm_event *event)
12741282
{
1283+
struct nvmet_rdma_port *port = cm_id->context;
12751284
struct nvmet_rdma_device *ndev;
12761285
struct nvmet_rdma_queue *queue;
12771286
int ret = -EINVAL;
@@ -1287,7 +1296,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
12871296
ret = -ENOMEM;
12881297
goto put_device;
12891298
}
1290-
queue->port = cm_id->context;
1299+
queue->port = port->nport;
12911300

12921301
if (queue->host_qid == 0) {
12931302
/* Let inflight controller teardown complete */
@@ -1412,7 +1421,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
14121421
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
14131422
struct nvmet_rdma_queue *queue)
14141423
{
1415-
struct nvmet_port *port;
1424+
struct nvmet_rdma_port *port;
14161425

14171426
if (queue) {
14181427
/*
@@ -1431,7 +1440,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
14311440
* cm_id destroy. use atomic xchg to make sure
14321441
* we don't compete with remove_port.
14331442
*/
1434-
if (xchg(&port->priv, NULL) != cm_id)
1443+
if (xchg(&port->cm_id, NULL) != cm_id)
14351444
return 0;
14361445

14371446
/*
@@ -1462,6 +1471,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
14621471
nvmet_rdma_queue_established(queue);
14631472
break;
14641473
case RDMA_CM_EVENT_ADDR_CHANGE:
1474+
if (!queue) {
1475+
struct nvmet_rdma_port *port = cm_id->context;
1476+
1477+
schedule_delayed_work(&port->repair_work, 0);
1478+
break;
1479+
}
1480+
/* FALLTHROUGH */
14651481
case RDMA_CM_EVENT_DISCONNECTED:
14661482
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
14671483
nvmet_rdma_queue_disconnect(queue);
@@ -1504,42 +1520,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
15041520
mutex_unlock(&nvmet_rdma_queue_mutex);
15051521
}
15061522

1507-
static int nvmet_rdma_add_port(struct nvmet_port *port)
1523+
static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port)
15081524
{
1509-
struct rdma_cm_id *cm_id;
1510-
struct sockaddr_storage addr = { };
1511-
__kernel_sa_family_t af;
1512-
int ret;
1513-
1514-
switch (port->disc_addr.adrfam) {
1515-
case NVMF_ADDR_FAMILY_IP4:
1516-
af = AF_INET;
1517-
break;
1518-
case NVMF_ADDR_FAMILY_IP6:
1519-
af = AF_INET6;
1520-
break;
1521-
default:
1522-
pr_err("address family %d not supported\n",
1523-
port->disc_addr.adrfam);
1524-
return -EINVAL;
1525-
}
1525+
struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL);
15261526

1527-
if (port->inline_data_size < 0) {
1528-
port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
1529-
} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
1530-
pr_warn("inline_data_size %u is too large, reducing to %u\n",
1531-
port->inline_data_size,
1532-
NVMET_RDMA_MAX_INLINE_DATA_SIZE);
1533-
port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
1534-
}
1527+
if (cm_id)
1528+
rdma_destroy_id(cm_id);
1529+
}
15351530

1536-
ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
1537-
port->disc_addr.trsvcid, &addr);
1538-
if (ret) {
1539-
pr_err("malformed ip/port passed: %s:%s\n",
1540-
port->disc_addr.traddr, port->disc_addr.trsvcid);
1541-
return ret;
1542-
}
1531+
static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
1532+
{
1533+
struct sockaddr *addr = (struct sockaddr *)&port->addr;
1534+
struct rdma_cm_id *cm_id;
1535+
int ret;
15431536

15441537
cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
15451538
RDMA_PS_TCP, IB_QPT_RC);
@@ -1558,42 +1551,112 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
15581551
goto out_destroy_id;
15591552
}
15601553

1561-
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr);
1554+
ret = rdma_bind_addr(cm_id, addr);
15621555
if (ret) {
1563-
pr_err("binding CM ID to %pISpcs failed (%d)\n",
1564-
(struct sockaddr *)&addr, ret);
1556+
pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret);
15651557
goto out_destroy_id;
15661558
}
15671559

15681560
ret = rdma_listen(cm_id, 128);
15691561
if (ret) {
1570-
pr_err("listening to %pISpcs failed (%d)\n",
1571-
(struct sockaddr *)&addr, ret);
1562+
pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
15721563
goto out_destroy_id;
15731564
}
15741565

1575-
pr_info("enabling port %d (%pISpcs)\n",
1576-
le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
1577-
port->priv = cm_id;
1566+
port->cm_id = cm_id;
15781567
return 0;
15791568

15801569
out_destroy_id:
15811570
rdma_destroy_id(cm_id);
15821571
return ret;
15831572
}
15841573

1585-
static void nvmet_rdma_remove_port(struct nvmet_port *port)
1574+
static void nvmet_rdma_repair_port_work(struct work_struct *w)
15861575
{
1587-
struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
1576+
struct nvmet_rdma_port *port = container_of(to_delayed_work(w),
1577+
struct nvmet_rdma_port, repair_work);
1578+
int ret;
15881579

1589-
if (cm_id)
1590-
rdma_destroy_id(cm_id);
1580+
nvmet_rdma_disable_port(port);
1581+
ret = nvmet_rdma_enable_port(port);
1582+
if (ret)
1583+
schedule_delayed_work(&port->repair_work, 5 * HZ);
1584+
}
1585+
1586+
static int nvmet_rdma_add_port(struct nvmet_port *nport)
1587+
{
1588+
struct nvmet_rdma_port *port;
1589+
__kernel_sa_family_t af;
1590+
int ret;
1591+
1592+
port = kzalloc(sizeof(*port), GFP_KERNEL);
1593+
if (!port)
1594+
return -ENOMEM;
1595+
1596+
nport->priv = port;
1597+
port->nport = nport;
1598+
INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work);
1599+
1600+
switch (nport->disc_addr.adrfam) {
1601+
case NVMF_ADDR_FAMILY_IP4:
1602+
af = AF_INET;
1603+
break;
1604+
case NVMF_ADDR_FAMILY_IP6:
1605+
af = AF_INET6;
1606+
break;
1607+
default:
1608+
pr_err("address family %d not supported\n",
1609+
nport->disc_addr.adrfam);
1610+
ret = -EINVAL;
1611+
goto out_free_port;
1612+
}
1613+
1614+
if (nport->inline_data_size < 0) {
1615+
nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
1616+
} else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
1617+
pr_warn("inline_data_size %u is too large, reducing to %u\n",
1618+
nport->inline_data_size,
1619+
NVMET_RDMA_MAX_INLINE_DATA_SIZE);
1620+
nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
1621+
}
1622+
1623+
ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
1624+
nport->disc_addr.trsvcid, &port->addr);
1625+
if (ret) {
1626+
pr_err("malformed ip/port passed: %s:%s\n",
1627+
nport->disc_addr.traddr, nport->disc_addr.trsvcid);
1628+
goto out_free_port;
1629+
}
1630+
1631+
ret = nvmet_rdma_enable_port(port);
1632+
if (ret)
1633+
goto out_free_port;
1634+
1635+
pr_info("enabling port %d (%pISpcs)\n",
1636+
le16_to_cpu(nport->disc_addr.portid),
1637+
(struct sockaddr *)&port->addr);
1638+
1639+
return 0;
1640+
1641+
out_free_port:
1642+
kfree(port);
1643+
return ret;
1644+
}
1645+
1646+
static void nvmet_rdma_remove_port(struct nvmet_port *nport)
1647+
{
1648+
struct nvmet_rdma_port *port = nport->priv;
1649+
1650+
cancel_delayed_work_sync(&port->repair_work);
1651+
nvmet_rdma_disable_port(port);
1652+
kfree(port);
15911653
}
15921654

15931655
static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
1594-
struct nvmet_port *port, char *traddr)
1656+
struct nvmet_port *nport, char *traddr)
15951657
{
1596-
struct rdma_cm_id *cm_id = port->priv;
1658+
struct nvmet_rdma_port *port = nport->priv;
1659+
struct rdma_cm_id *cm_id = port->cm_id;
15971660

15981661
if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
15991662
struct nvmet_rdma_rsp *rsp =
@@ -1603,7 +1666,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
16031666

16041667
sprintf(traddr, "%pISc", addr);
16051668
} else {
1606-
memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
1669+
memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
16071670
}
16081671
}
16091672

0 commit comments

Comments
 (0)