Skip to content

Commit c4de97f

Browse files
committed
svcrdma: Handle device removal outside of the CM event handler
Synchronously wait for all disconnects to complete to ensure the transports have divested all hardware resources before the underlying RDMA device can safely be removed. Reviewed-by: Sagi Grimberg <[email protected]> Signed-off-by: Chuck Lever <[email protected]>
1 parent 438f81e commit c4de97f

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include <linux/sunrpc/rpc_rdma.h>
4949
#include <linux/sunrpc/rpc_rdma_cid.h>
5050
#include <linux/sunrpc/svc_rdma_pcl.h>
51+
#include <linux/sunrpc/rdma_rn.h>
5152

5253
#include <linux/percpu_counter.h>
5354
#include <rdma/ib_verbs.h>
@@ -76,6 +77,7 @@ struct svcxprt_rdma {
7677
struct svc_xprt sc_xprt; /* SVC transport structure */
7778
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
7879
struct list_head sc_accept_q; /* Conn. waiting accept */
80+
struct rpcrdma_notification sc_rn; /* removal notification */
7981
int sc_ord; /* RDMA read limit */
8082
int sc_max_send_sges;
8183
bool sc_snd_w_inv; /* OK to use Send With Invalidate */

include/trace/events/rpcrdma.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2172,6 +2172,29 @@ TRACE_EVENT(svcrdma_qp_error,
21722172
)
21732173
);
21742174

2175+
TRACE_EVENT(svcrdma_device_removal,
2176+
TP_PROTO(
2177+
const struct rdma_cm_id *id
2178+
),
2179+
2180+
TP_ARGS(id),
2181+
2182+
TP_STRUCT__entry(
2183+
__string(name, id->device->name)
2184+
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
2185+
),
2186+
2187+
TP_fast_assign(
2188+
__assign_str(name);
2189+
memcpy(__entry->addr, &id->route.addr.dst_addr,
2190+
sizeof(struct sockaddr_in6));
2191+
),
2192+
2193+
TP_printk("device %s to be removed, disconnecting %pISpc\n",
2194+
__get_str(name), __entry->addr
2195+
)
2196+
);
2197+
21752198
DECLARE_EVENT_CLASS(svcrdma_sendqueue_class,
21762199
TP_PROTO(
21772200
const struct svcxprt_rdma *rdma,

net/sunrpc/xprtrdma/svc_rdma_transport.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,6 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
339339
svc_xprt_enqueue(xprt);
340340
break;
341341
case RDMA_CM_EVENT_DISCONNECTED:
342-
case RDMA_CM_EVENT_DEVICE_REMOVAL:
343342
svc_xprt_deferred_close(xprt);
344343
break;
345344
default:
@@ -384,6 +383,16 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
384383
return &cma_xprt->sc_xprt;
385384
}
386385

386+
static void svc_rdma_xprt_done(struct rpcrdma_notification *rn)
387+
{
388+
struct svcxprt_rdma *rdma = container_of(rn, struct svcxprt_rdma,
389+
sc_rn);
390+
struct rdma_cm_id *id = rdma->sc_cm_id;
391+
392+
trace_svcrdma_device_removal(id);
393+
svc_xprt_close(&rdma->sc_xprt);
394+
}
395+
387396
/*
388397
* This is the xpo_recvfrom function for listening endpoints. Its
389398
* purpose is to accept incoming connections. The CMA callback handler
@@ -425,6 +434,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
425434
dev = newxprt->sc_cm_id->device;
426435
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
427436

437+
if (rpcrdma_rn_register(dev, &newxprt->sc_rn, svc_rdma_xprt_done))
438+
goto errout;
439+
428440
newxprt->sc_max_req_size = svcrdma_max_req_size;
429441
newxprt->sc_max_requests = svcrdma_max_requests;
430442
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
@@ -580,6 +592,7 @@ static void __svc_rdma_free(struct work_struct *work)
580592
{
581593
struct svcxprt_rdma *rdma =
582594
container_of(work, struct svcxprt_rdma, sc_work);
595+
struct ib_device *device = rdma->sc_cm_id->device;
583596

584597
/* This blocks until the Completion Queues are empty */
585598
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -608,6 +621,7 @@ static void __svc_rdma_free(struct work_struct *work)
608621
/* Destroy the CM ID */
609622
rdma_destroy_id(rdma->sc_cm_id);
610623

624+
rpcrdma_rn_unregister(device, &rdma->sc_rn);
611625
kfree(rdma);
612626
}
613627

0 commit comments

Comments
 (0)