Skip to content

Commit e084ee6

Browse files
committed
svcrdma: Add Write chunk WRs to the RPC's Send WR chain
Chain RDMA Writes that convey Write chunks onto the local Send chain. This means all WRs for an RPC Reply are now posted with a single ib_post_send() call, and there is a single Send completion when all of these are done. That reduces both the per-transport doorbell rate and completion rate. Signed-off-by: Chuck Lever <[email protected]>
1 parent d2727ce commit e084ee6

File tree

3 files changed

+78
-26
lines changed

3 files changed

+78
-26
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ struct svc_rdma_recv_ctxt {
210210
*/
211211
struct svc_rdma_write_info {
212212
struct svcxprt_rdma *wi_rdma;
213+
struct list_head wi_list;
213214

214215
const struct svc_rdma_chunk *wi_chunk;
215216

@@ -238,7 +239,10 @@ struct svc_rdma_send_ctxt {
238239
struct ib_cqe sc_cqe;
239240
struct xdr_buf sc_hdrbuf;
240241
struct xdr_stream sc_stream;
242+
243+
struct list_head sc_write_info_list;
241244
struct svc_rdma_write_info sc_reply_info;
245+
242246
void *sc_xprt_buf;
243247
int sc_page_count;
244248
int sc_cur_sge_no;
@@ -270,11 +274,14 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
270274
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
271275
struct svc_rdma_chunk_ctxt *cc,
272276
enum dma_data_direction dir);
277+
extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
278+
struct svc_rdma_send_ctxt *ctxt);
273279
extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
274280
struct svc_rdma_send_ctxt *ctxt);
275-
extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
276-
const struct svc_rdma_recv_ctxt *rctxt,
277-
const struct xdr_buf *xdr);
281+
extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
282+
const struct svc_rdma_pcl *write_pcl,
283+
struct svc_rdma_send_ctxt *sctxt,
284+
const struct xdr_buf *xdr);
278285
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
279286
const struct svc_rdma_pcl *write_pcl,
280287
const struct svc_rdma_pcl *reply_pcl,

net/sunrpc/xprtrdma/svc_rdma_rw.c

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,28 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
230230
queue_work(svcrdma_wq, &info->wi_work);
231231
}
232232

233+
/**
234+
* svc_rdma_write_chunk_release - Release Write chunk I/O resources
235+
* @rdma: controlling transport
236+
* @ctxt: Send context that is being released
237+
*/
238+
void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
239+
struct svc_rdma_send_ctxt *ctxt)
240+
{
241+
struct svc_rdma_write_info *info;
242+
struct svc_rdma_chunk_ctxt *cc;
243+
244+
while (!list_empty(&ctxt->sc_write_info_list)) {
245+
info = list_first_entry(&ctxt->sc_write_info_list,
246+
struct svc_rdma_write_info, wi_list);
247+
list_del(&info->wi_list);
248+
249+
cc = &info->wi_cc;
250+
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
251+
svc_rdma_write_info_free(info);
252+
}
253+
}
254+
233255
/**
234256
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
235257
* @rdma: controlling transport
@@ -286,26 +308,23 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
286308
struct ib_cqe *cqe = wc->wr_cqe;
287309
struct svc_rdma_chunk_ctxt *cc =
288310
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
289-
struct svc_rdma_write_info *info =
290-
container_of(cc, struct svc_rdma_write_info, wi_cc);
291311

292312
switch (wc->status) {
293313
case IB_WC_SUCCESS:
294314
trace_svcrdma_wc_write(&cc->cc_cid);
295-
break;
315+
return;
296316
case IB_WC_WR_FLUSH_ERR:
297317
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
298318
break;
299319
default:
300320
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
301321
}
302322

303-
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
304-
305-
if (unlikely(wc->status != IB_WC_SUCCESS))
306-
svc_xprt_deferred_close(&rdma->sc_xprt);
307-
308-
svc_rdma_write_info_free(info);
323+
/* The RDMA Write has flushed, so the client won't get
324+
* some of the outgoing RPC message. Signal the loss
325+
* to the client by closing the connection.
326+
*/
327+
svc_xprt_deferred_close(&rdma->sc_xprt);
309328
}
310329

311330
/**
@@ -601,13 +620,19 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
601620
return xdr->len;
602621
}
603622

604-
static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
605-
const struct svc_rdma_chunk *chunk,
606-
const struct xdr_buf *xdr)
623+
/* Link Write WRs for @chunk onto @sctxt's WR chain.
624+
*/
625+
static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
626+
struct svc_rdma_send_ctxt *sctxt,
627+
const struct svc_rdma_chunk *chunk,
628+
const struct xdr_buf *xdr)
607629
{
608630
struct svc_rdma_write_info *info;
609631
struct svc_rdma_chunk_ctxt *cc;
632+
struct ib_send_wr *first_wr;
610633
struct xdr_buf payload;
634+
struct list_head *pos;
635+
struct ib_cqe *cqe;
611636
int ret;
612637

613638
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
@@ -623,10 +648,25 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
623648
if (ret != payload.len)
624649
goto out_err;
625650

626-
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
627-
ret = svc_rdma_post_chunk_ctxt(rdma, cc);
628-
if (ret < 0)
651+
ret = -EINVAL;
652+
if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
629653
goto out_err;
654+
655+
first_wr = sctxt->sc_wr_chain;
656+
cqe = &cc->cc_cqe;
657+
list_for_each(pos, &cc->cc_rwctxts) {
658+
struct svc_rdma_rw_ctxt *rwc;
659+
660+
rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
661+
first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
662+
rdma->sc_port_num, cqe, first_wr);
663+
cqe = NULL;
664+
}
665+
sctxt->sc_wr_chain = first_wr;
666+
sctxt->sc_sqecount += cc->cc_sqecount;
667+
list_add(&info->wi_list, &sctxt->sc_write_info_list);
668+
669+
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
630670
return 0;
631671

632672
out_err:
@@ -635,25 +675,27 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
635675
}
636676

637677
/**
638-
* svc_rdma_send_write_list - Send all chunks on the Write list
678+
* svc_rdma_prepare_write_list - Construct WR chain for sending Write list
639679
* @rdma: controlling RDMA transport
640-
* @rctxt: Write list provisioned by the client
680+
* @write_pcl: Write list provisioned by the client
681+
* @sctxt: Send WR resources
641682
* @xdr: xdr_buf containing an RPC Reply message
642683
*
643684
* Returns zero on success, or a negative errno if one or more
644685
* Write chunks could not be sent.
645686
*/
646-
int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
647-
const struct svc_rdma_recv_ctxt *rctxt,
648-
const struct xdr_buf *xdr)
687+
int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
688+
const struct svc_rdma_pcl *write_pcl,
689+
struct svc_rdma_send_ctxt *sctxt,
690+
const struct xdr_buf *xdr)
649691
{
650692
struct svc_rdma_chunk *chunk;
651693
int ret;
652694

653-
pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
695+
pcl_for_each_chunk(chunk, write_pcl) {
654696
if (!chunk->ch_payload_length)
655697
break;
656-
ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
698+
ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
657699
if (ret < 0)
658700
return ret;
659701
}

net/sunrpc/xprtrdma/svc_rdma_sendto.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
142142
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
143143
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
144144
ctxt->sc_cqe.done = svc_rdma_wc_send;
145+
INIT_LIST_HEAD(&ctxt->sc_write_info_list);
145146
ctxt->sc_xprt_buf = buffer;
146147
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
147148
rdma->sc_max_req_size);
@@ -227,6 +228,7 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
227228
struct ib_device *device = rdma->sc_cm_id->device;
228229
unsigned int i;
229230

231+
svc_rdma_write_chunk_release(rdma, ctxt);
230232
svc_rdma_reply_chunk_release(rdma, ctxt);
231233

232234
if (ctxt->sc_page_count)
@@ -1013,7 +1015,8 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
10131015
if (!p)
10141016
goto put_ctxt;
10151017

1016-
ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
1018+
ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
1019+
&rqstp->rq_res);
10171020
if (ret < 0)
10181021
goto put_ctxt;
10191022

0 commit comments

Comments
 (0)