Skip to content

Commit e28ce90

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: kmalloc rpcrdma_ep separate from rpcrdma_xprt
Change the rpcrdma_xprt_disconnect() function so that it no longer waits for the DISCONNECTED event. This prevents blocking if the remote is unresponsive. In rpcrdma_xprt_disconnect(), the transport's rpcrdma_ep is detached. Upon return from rpcrdma_xprt_disconnect(), the transport (r_xprt) is ready immediately for a new connection. The RDMA_CM_DEVICE_REMOVAL and RDMA_CM_DISCONNECTED events are now handled almost identically. However, because the lifetimes of rpcrdma_xprt structures and rpcrdma_ep structures are now independent, creating an rpcrdma_ep needs to take a module ref count. The ep now owns most of the hardware resources for a transport. Also, a kref is needed to ensure that rpcrdma_ep sticks around long enough for the cm_event_handler to finish. Signed-off-by: Chuck Lever <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent 745b734 commit e28ce90

File tree

7 files changed

+143
-191
lines changed

7 files changed

+143
-191
lines changed

include/trace/events/rpcrdma.h

Lines changed: 2 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
104104
TP_fast_assign(
105105
__entry->r_xprt = r_xprt;
106106
__entry->rc = rc;
107-
__entry->connect_status = r_xprt->rx_ep.re_connect_status;
107+
__entry->connect_status = r_xprt->rx_ep->re_connect_status;
108108
__assign_str(addr, rpcrdma_addrstr(r_xprt));
109109
__assign_str(port, rpcrdma_portstr(r_xprt));
110110
),
@@ -342,37 +342,6 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event,
342342
** Connection events
343343
**/
344344

345-
TRACE_EVENT(xprtrdma_cm_event,
346-
TP_PROTO(
347-
const struct rpcrdma_xprt *r_xprt,
348-
struct rdma_cm_event *event
349-
),
350-
351-
TP_ARGS(r_xprt, event),
352-
353-
TP_STRUCT__entry(
354-
__field(const void *, r_xprt)
355-
__field(unsigned int, event)
356-
__field(int, status)
357-
__string(addr, rpcrdma_addrstr(r_xprt))
358-
__string(port, rpcrdma_portstr(r_xprt))
359-
),
360-
361-
TP_fast_assign(
362-
__entry->r_xprt = r_xprt;
363-
__entry->event = event->event;
364-
__entry->status = event->status;
365-
__assign_str(addr, rpcrdma_addrstr(r_xprt));
366-
__assign_str(port, rpcrdma_portstr(r_xprt));
367-
),
368-
369-
TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
370-
__get_str(addr), __get_str(port),
371-
__entry->r_xprt, rdma_show_cm_event(__entry->event),
372-
__entry->event, __entry->status
373-
)
374-
);
375-
376345
TRACE_EVENT(xprtrdma_inline_thresh,
377346
TP_PROTO(
378347
const struct rpcrdma_ep *ep
@@ -409,34 +378,6 @@ TRACE_EVENT(xprtrdma_inline_thresh,
409378
)
410379
);
411380

412-
TRACE_EVENT(xprtrdma_remove,
413-
TP_PROTO(
414-
const struct rpcrdma_ep *ep
415-
),
416-
417-
TP_ARGS(ep),
418-
419-
TP_STRUCT__entry(
420-
__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
421-
__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
422-
__string(name, ep->re_id->device->name)
423-
),
424-
425-
TP_fast_assign(
426-
const struct rdma_cm_id *id = ep->re_id;
427-
428-
memcpy(__entry->srcaddr, &id->route.addr.src_addr,
429-
sizeof(struct sockaddr_in6));
430-
memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
431-
sizeof(struct sockaddr_in6));
432-
__assign_str(name, id->device->name);
433-
),
434-
435-
TP_printk("%pISpc -> %pISpc device=%s",
436-
__entry->srcaddr, __entry->dstaddr, __get_str(name)
437-
)
438-
);
439-
440381
DEFINE_CONN_EVENT(connect);
441382
DEFINE_CONN_EVENT(disconnect);
442383
DEFINE_CONN_EVENT(flush_dct);
@@ -831,7 +772,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
831772
__entry->r_xprt = r_xprt;
832773
__entry->count = count;
833774
__entry->status = status;
834-
__entry->posted = r_xprt->rx_ep.re_receive_count;
775+
__entry->posted = r_xprt->rx_ep->re_receive_count;
835776
__assign_str(addr, rpcrdma_addrstr(r_xprt));
836777
__assign_str(port, rpcrdma_portstr(r_xprt));
837778
),

net/sunrpc/xprtrdma/backchannel.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
4444
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
4545
{
4646
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
47-
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
47+
struct rpcrdma_ep *ep = r_xprt->rx_ep;
4848
size_t maxmsg;
4949

5050
maxmsg = min_t(unsigned int, ep->re_inline_send, ep->re_inline_recv);
@@ -190,7 +190,7 @@ static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
190190
if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
191191
return NULL;
192192

193-
size = min_t(size_t, r_xprt->rx_ep.re_inline_recv, PAGE_SIZE);
193+
size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE);
194194
req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
195195
if (!req)
196196
return NULL;

net/sunrpc/xprtrdma/frwr_ops.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)
7474

7575
if (mr->mr_dir != DMA_NONE) {
7676
trace_xprtrdma_mr_unmap(mr);
77-
ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device,
77+
ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
7878
mr->mr_sg, mr->mr_nents, mr->mr_dir);
7979
mr->mr_dir = DMA_NONE;
8080
}
@@ -115,7 +115,7 @@ void frwr_reset(struct rpcrdma_req *req)
115115
*/
116116
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
117117
{
118-
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
118+
struct rpcrdma_ep *ep = r_xprt->rx_ep;
119119
unsigned int depth = ep->re_max_fr_depth;
120120
struct scatterlist *sg;
121121
struct ib_mr *frmr;
@@ -283,7 +283,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
283283
int nsegs, bool writing, __be32 xid,
284284
struct rpcrdma_mr *mr)
285285
{
286-
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
286+
struct rpcrdma_ep *ep = r_xprt->rx_ep;
287287
struct ib_reg_wr *reg_wr;
288288
int i, n, dma_nents;
289289
struct ib_mr *ibmr;
@@ -405,7 +405,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
405405
post_wr = &frwr->fr_regwr.wr;
406406
}
407407

408-
return ib_post_send(r_xprt->rx_ep.re_id->qp, post_wr, NULL);
408+
return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);
409409
}
410410

411411
/**
@@ -535,7 +535,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
535535
* unless re_id->qp is a valid pointer.
536536
*/
537537
bad_wr = NULL;
538-
rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr);
538+
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
539539

540540
/* The final LOCAL_INV WR in the chain is supposed to
541541
* do the wake. If it was never posted, the wake will
@@ -640,7 +640,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
640640
* unless re_id->qp is a valid pointer.
641641
*/
642642
bad_wr = NULL;
643-
rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr);
643+
rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
644644
if (!rc)
645645
return;
646646

net/sunrpc/xprtrdma/rpc_rdma.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,10 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
131131
struct rpc_rqst *rqst)
132132
{
133133
struct xdr_buf *xdr = &rqst->rq_snd_buf;
134+
struct rpcrdma_ep *ep = r_xprt->rx_ep;
134135
unsigned int count, remaining, offset;
135136

136-
if (xdr->len > r_xprt->rx_ep.re_max_inline_send)
137+
if (xdr->len > ep->re_max_inline_send)
137138
return false;
138139

139140
if (xdr->page_len) {
@@ -144,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
144145
remaining -= min_t(unsigned int,
145146
PAGE_SIZE - offset, remaining);
146147
offset = 0;
147-
if (++count > r_xprt->rx_ep.re_attr.cap.max_send_sge)
148+
if (++count > ep->re_attr.cap.max_send_sge)
148149
return false;
149150
}
150151
}
@@ -161,7 +162,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
161162
static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
162163
struct rpc_rqst *rqst)
163164
{
164-
return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.re_max_inline_recv;
165+
return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
165166
}
166167

167168
/* The client is required to provide a Reply chunk if the maximum
@@ -175,7 +176,7 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
175176
const struct xdr_buf *buf = &rqst->rq_rcv_buf;
176177

177178
return (buf->head[0].iov_len + buf->tail[0].iov_len) <
178-
r_xprt->rx_ep.re_max_inline_recv;
179+
r_xprt->rx_ep->re_max_inline_recv;
179180
}
180181

181182
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -254,15 +255,15 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
254255
/* When encoding a Read chunk, the tail iovec contains an
255256
* XDR pad and may be omitted.
256257
*/
257-
if (type == rpcrdma_readch && r_xprt->rx_ep.re_implicit_roundup)
258+
if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
258259
goto out;
259260

260261
/* When encoding a Write chunk, some servers need to see an
261262
* extra segment for non-XDR-aligned Write chunks. The upper
262263
* layer provides space in the tail iovec that may be used
263264
* for this purpose.
264265
*/
265-
if (type == rpcrdma_writech && r_xprt->rx_ep.re_implicit_roundup)
266+
if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
266267
goto out;
267268

268269
if (xdrbuf->tail[0].iov_len)
@@ -1475,8 +1476,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
14751476

14761477
if (credits == 0)
14771478
credits = 1; /* don't deadlock */
1478-
else if (credits > r_xprt->rx_ep.re_max_requests)
1479-
credits = r_xprt->rx_ep.re_max_requests;
1479+
else if (credits > r_xprt->rx_ep->re_max_requests)
1480+
credits = r_xprt->rx_ep->re_max_requests;
14801481
if (buf->rb_credits != credits)
14811482
rpcrdma_update_cwnd(r_xprt, credits);
14821483
rpcrdma_post_recvs(r_xprt, false);

net/sunrpc/xprtrdma/transport.c

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -238,12 +238,12 @@ xprt_rdma_connect_worker(struct work_struct *work)
238238
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
239239
rx_connect_worker.work);
240240
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
241-
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
242241
int rc;
243242

244243
rc = rpcrdma_xprt_connect(r_xprt);
245244
xprt_clear_connecting(xprt);
246-
if (ep->re_connect_status > 0) {
245+
if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) {
246+
xprt->connect_cookie++;
247247
xprt->stat.connect_count++;
248248
xprt->stat.connect_time += (long)jiffies -
249249
xprt->stat.connect_start;
@@ -266,7 +266,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
266266
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
267267

268268
trace_xprtrdma_op_inject_dsc(r_xprt);
269-
rdma_disconnect(r_xprt->rx_ep.re_id);
269+
rdma_disconnect(r_xprt->rx_ep->re_id);
270270
}
271271

272272
/**
@@ -316,10 +316,15 @@ xprt_setup_rdma(struct xprt_create *args)
316316
if (args->addrlen > sizeof(xprt->addr))
317317
return ERR_PTR(-EBADF);
318318

319+
if (!try_module_get(THIS_MODULE))
320+
return ERR_PTR(-EIO);
321+
319322
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0,
320323
xprt_rdma_slot_table_entries);
321-
if (!xprt)
324+
if (!xprt) {
325+
module_put(THIS_MODULE);
322326
return ERR_PTR(-ENOMEM);
327+
}
323328

324329
xprt->timeout = &xprt_rdma_default_timeout;
325330
xprt->connect_timeout = xprt->timeout->to_initval;
@@ -348,11 +353,12 @@ xprt_setup_rdma(struct xprt_create *args)
348353

349354
new_xprt = rpcx_to_rdmax(xprt);
350355
rc = rpcrdma_buffer_create(new_xprt);
351-
if (rc)
352-
goto out2;
353-
354-
if (!try_module_get(THIS_MODULE))
355-
goto out4;
356+
if (rc) {
357+
xprt_rdma_free_addresses(xprt);
358+
xprt_free(xprt);
359+
module_put(THIS_MODULE);
360+
return ERR_PTR(rc);
361+
}
356362

357363
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
358364
xprt_rdma_connect_worker);
@@ -364,15 +370,6 @@ xprt_setup_rdma(struct xprt_create *args)
364370
xprt->address_strings[RPC_DISPLAY_PORT]);
365371
trace_xprtrdma_create(new_xprt);
366372
return xprt;
367-
368-
out4:
369-
rpcrdma_buffer_destroy(&new_xprt->rx_buf);
370-
rc = -ENODEV;
371-
out2:
372-
trace_xprtrdma_op_destroy(new_xprt);
373-
xprt_rdma_free_addresses(xprt);
374-
xprt_free(xprt);
375-
return ERR_PTR(rc);
376373
}
377374

378375
/**
@@ -491,11 +488,11 @@ static void
491488
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
492489
{
493490
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
494-
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
491+
struct rpcrdma_ep *ep = r_xprt->rx_ep;
495492
unsigned long delay;
496493

497494
delay = 0;
498-
if (ep->re_connect_status != 0) {
495+
if (ep && ep->re_connect_status != 0) {
499496
delay = xprt_reconnect_delay(xprt);
500497
xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
501498
}

0 commit comments

Comments
 (0)