Skip to content

Commit 71b4353

Browse files
committed
svcrdma: Post Send WR chain
Eventually I'd like the server to post the reply's Send WR along with any Write WRs using only a single call to ib_post_send(), in order to reduce the NIC's doorbell rate. To do this, add an anchor for a WR chain to svc_rdma_send_ctxt, and refactor svc_rdma_send() to post this WR chain to the Send Queue. For the moment, the posted chain will continue to contain a single Send WR. Signed-off-by: Chuck Lever <[email protected]>
1 parent fc709d8 commit 71b4353

File tree

3 files changed

+38
-19
lines changed

3 files changed

+38
-19
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ struct svc_rdma_send_ctxt {
210210

211211
struct svcxprt_rdma *sc_rdma;
212212
struct ib_send_wr sc_send_wr;
213+
struct ib_send_wr *sc_wr_chain;
214+
int sc_sqecount;
213215
struct ib_cqe sc_cqe;
214216
struct xdr_buf sc_hdrbuf;
215217
struct xdr_stream sc_stream;
@@ -258,8 +260,8 @@ extern struct svc_rdma_send_ctxt *
258260
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
259261
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
260262
struct svc_rdma_send_ctxt *ctxt);
261-
extern int svc_rdma_send(struct svcxprt_rdma *rdma,
262-
struct svc_rdma_send_ctxt *ctxt);
263+
extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
264+
struct svc_rdma_send_ctxt *ctxt);
263265
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
264266
struct svc_rdma_send_ctxt *sctxt,
265267
const struct svc_rdma_pcl *write_pcl,

net/sunrpc/xprtrdma/svc_rdma_backchannel.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
9090
*/
9191
get_page(virt_to_page(rqst->rq_buffer));
9292
sctxt->sc_send_wr.opcode = IB_WR_SEND;
93-
return svc_rdma_send(rdma, sctxt);
93+
return svc_rdma_post_send(rdma, sctxt);
9494
}
9595

9696
/* Server-side transport endpoint wants a whole page for its send

net/sunrpc/xprtrdma/svc_rdma_sendto.c

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,9 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
208208
ctxt->sc_send_wr.num_sge = 0;
209209
ctxt->sc_cur_sge_no = 0;
210210
ctxt->sc_page_count = 0;
211+
ctxt->sc_wr_chain = &ctxt->sc_send_wr;
212+
ctxt->sc_sqecount = 1;
213+
211214
return ctxt;
212215

213216
out_empty:
@@ -293,7 +296,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
293296
struct svc_rdma_send_ctxt *ctxt =
294297
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
295298

296-
svc_rdma_wake_send_waiters(rdma, 1);
299+
svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
297300

298301
if (unlikely(wc->status != IB_WC_SUCCESS))
299302
goto flushed;
@@ -312,36 +315,44 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
312315
}
313316

314317
/**
315-
* svc_rdma_send - Post a single Send WR
316-
* @rdma: transport on which to post the WR
317-
* @ctxt: send ctxt with a Send WR ready to post
318+
* svc_rdma_post_send - Post a WR chain to the Send Queue
319+
* @rdma: transport context
320+
* @ctxt: WR chain to post
318321
*
319322
* Copy fields in @ctxt to stack variables in order to guarantee
320323
* that these values remain available after the ib_post_send() call.
321324
* In some error flow cases, svc_rdma_wc_send() releases @ctxt.
322325
*
326+
* Note there is potential for starvation when the Send Queue is
327+
* full because there is no order to when waiting threads are
328+
* awoken. The transport is typically provisioned with a deep
329+
* enough Send Queue that SQ exhaustion should be a rare event.
330+
*
323331
* Return values:
324332
* %0: @ctxt's WR chain was posted successfully
325333
* %-ENOTCONN: The connection was lost
326334
*/
327-
int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
335+
int svc_rdma_post_send(struct svcxprt_rdma *rdma,
336+
struct svc_rdma_send_ctxt *ctxt)
328337
{
329-
struct ib_send_wr *wr = &ctxt->sc_send_wr;
338+
struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
339+
struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
340+
const struct ib_send_wr *bad_wr = first_wr;
330341
struct rpc_rdma_cid cid = ctxt->sc_cid;
331-
int ret;
342+
int ret, sqecount = ctxt->sc_sqecount;
332343

333344
might_sleep();
334345

335346
/* Sync the transport header buffer */
336347
ib_dma_sync_single_for_device(rdma->sc_pd->device,
337-
wr->sg_list[0].addr,
338-
wr->sg_list[0].length,
348+
send_wr->sg_list[0].addr,
349+
send_wr->sg_list[0].length,
339350
DMA_TO_DEVICE);
340351

341352
/* If the SQ is full, wait until an SQ entry is available */
342353
while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
343-
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
344-
svc_rdma_wake_send_waiters(rdma, 1);
354+
if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
355+
svc_rdma_wake_send_waiters(rdma, sqecount);
345356

346357
/* When the transport is torn down, assume
347358
* ib_drain_sq() will trigger enough Send
@@ -358,12 +369,18 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
358369
}
359370

360371
trace_svcrdma_post_send(ctxt);
361-
ret = ib_post_send(rdma->sc_qp, wr, NULL);
372+
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
362373
if (ret) {
363374
trace_svcrdma_sq_post_err(rdma, &cid, ret);
364375
svc_xprt_deferred_close(&rdma->sc_xprt);
365-
svc_rdma_wake_send_waiters(rdma, 1);
366-
break;
376+
377+
/* If even one WR was posted, there will be a
378+
* Send completion that bumps sc_sq_avail.
379+
*/
380+
if (bad_wr == first_wr) {
381+
svc_rdma_wake_send_waiters(rdma, sqecount);
382+
break;
383+
}
367384
}
368385
return 0;
369386
}
@@ -884,7 +901,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
884901
sctxt->sc_send_wr.opcode = IB_WR_SEND;
885902
}
886903

887-
return svc_rdma_send(rdma, sctxt);
904+
return svc_rdma_post_send(rdma, sctxt);
888905
}
889906

890907
/**
@@ -948,7 +965,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
948965
sctxt->sc_send_wr.num_sge = 1;
949966
sctxt->sc_send_wr.opcode = IB_WR_SEND;
950967
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
951-
if (svc_rdma_send(rdma, sctxt))
968+
if (svc_rdma_post_send(rdma, sctxt))
952969
goto put_ctxt;
953970
return;
954971

0 commit comments

Comments
 (0)