Skip to content

Commit 614f3c9

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: Pull up sometimes
On some platforms, DMA mapping part of a page is more costly than copying bytes. Restore the pull-up code and use that when we think it's going to be faster. The heuristic for now is to pull-up when the size of the RPC message body fits in the buffer underlying the head iovec. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated, as is handling a Send completion, for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. Signed-off-by: Chuck Lever <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent d6764bb commit 614f3c9

File tree

5 files changed

+85
-7
lines changed

5 files changed

+85
-7
lines changed

include/trace/events/rpcrdma.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,8 @@ DEFINE_WRCH_EVENT(write);
532532
DEFINE_WRCH_EVENT(reply);
533533

534534
TRACE_DEFINE_ENUM(rpcrdma_noch);
535+
TRACE_DEFINE_ENUM(rpcrdma_noch_pullup);
536+
TRACE_DEFINE_ENUM(rpcrdma_noch_mapped);
535537
TRACE_DEFINE_ENUM(rpcrdma_readch);
536538
TRACE_DEFINE_ENUM(rpcrdma_areadch);
537539
TRACE_DEFINE_ENUM(rpcrdma_writech);
@@ -540,6 +542,8 @@ TRACE_DEFINE_ENUM(rpcrdma_replych);
540542
#define xprtrdma_show_chunktype(x) \
541543
__print_symbolic(x, \
542544
{ rpcrdma_noch, "inline" }, \
545+
{ rpcrdma_noch_pullup, "pullup" }, \
546+
{ rpcrdma_noch_mapped, "mapped" }, \
543547
{ rpcrdma_readch, "read list" }, \
544548
{ rpcrdma_areadch, "*read list" }, \
545549
{ rpcrdma_writech, "write list" }, \

net/sunrpc/xprtrdma/backchannel.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
7979
*p = xdr_zero;
8080

8181
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
82-
&rqst->rq_snd_buf, rpcrdma_noch))
82+
&rqst->rq_snd_buf, rpcrdma_noch_pullup))
8383
return -EIO;
8484

8585
trace_xprtrdma_cb_reply(rqst);

net/sunrpc/xprtrdma/rpc_rdma.c

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
392392
unsigned int pos;
393393
int nsegs;
394394

395-
if (rtype == rpcrdma_noch)
395+
if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
396396
goto done;
397397

398398
pos = rqst->rq_snd_buf.head[0].iov_len;
@@ -691,6 +691,72 @@ static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
691691
return false;
692692
}
693693

694+
/* Copy the tail to the end of the head buffer.
695+
*/
696+
static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
697+
struct rpcrdma_req *req,
698+
struct xdr_buf *xdr)
699+
{
700+
unsigned char *dst;
701+
702+
dst = (unsigned char *)xdr->head[0].iov_base;
703+
dst += xdr->head[0].iov_len + xdr->page_len;
704+
memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
705+
r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
706+
}
707+
708+
/* Copy pagelist content into the head buffer.
709+
*/
710+
static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
711+
struct rpcrdma_req *req,
712+
struct xdr_buf *xdr)
713+
{
714+
unsigned int len, page_base, remaining;
715+
struct page **ppages;
716+
unsigned char *src, *dst;
717+
718+
dst = (unsigned char *)xdr->head[0].iov_base;
719+
dst += xdr->head[0].iov_len;
720+
ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
721+
page_base = offset_in_page(xdr->page_base);
722+
remaining = xdr->page_len;
723+
while (remaining) {
724+
src = page_address(*ppages);
725+
src += page_base;
726+
len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
727+
memcpy(dst, src, len);
728+
r_xprt->rx_stats.pullup_copy_count += len;
729+
730+
ppages++;
731+
dst += len;
732+
remaining -= len;
733+
page_base = 0;
734+
}
735+
}
736+
737+
/* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
738+
* When the head, pagelist, and tail are small, a pull-up copy
739+
* is considerably less costly than DMA mapping the components
740+
* of @xdr.
741+
*
742+
* Assumptions:
743+
* - the caller has already verified that the total length
744+
* of the RPC Call body will fit into @rl_sendbuf.
745+
*/
746+
static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
747+
struct rpcrdma_req *req,
748+
struct xdr_buf *xdr)
749+
{
750+
if (unlikely(xdr->tail[0].iov_len))
751+
rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
752+
753+
if (unlikely(xdr->page_len))
754+
rpcrdma_pullup_pagelist(r_xprt, req, xdr);
755+
756+
/* The whole RPC message resides in the head iovec now */
757+
return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
758+
}
759+
694760
static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
695761
struct rpcrdma_req *req,
696762
struct xdr_buf *xdr)
@@ -779,7 +845,11 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
779845
goto out_unmap;
780846

781847
switch (rtype) {
782-
case rpcrdma_noch:
848+
case rpcrdma_noch_pullup:
849+
if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
850+
goto out_unmap;
851+
break;
852+
case rpcrdma_noch_mapped:
783853
if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
784854
goto out_unmap;
785855
break;
@@ -827,6 +897,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
827897
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
828898
struct xdr_stream *xdr = &req->rl_stream;
829899
enum rpcrdma_chunktype rtype, wtype;
900+
struct xdr_buf *buf = &rqst->rq_snd_buf;
830901
bool ddp_allowed;
831902
__be32 *p;
832903
int ret;
@@ -884,8 +955,9 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
884955
*/
885956
if (rpcrdma_args_inline(r_xprt, rqst)) {
886957
*p++ = rdma_msg;
887-
rtype = rpcrdma_noch;
888-
} else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
958+
rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
959+
rpcrdma_noch_pullup : rpcrdma_noch_mapped;
960+
} else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
889961
*p++ = rdma_msg;
890962
rtype = rpcrdma_readch;
891963
} else {
@@ -927,7 +999,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
927999
goto out_err;
9281000

9291001
ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
930-
&rqst->rq_snd_buf, rtype);
1002+
buf, rtype);
9311003
if (ret)
9321004
goto out_err;
9331005

net/sunrpc/xprtrdma/verbs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1165,7 +1165,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
11651165
for (i = 0; i < buf->rb_max_requests; i++) {
11661166
struct rpcrdma_req *req;
11671167

1168-
req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
1168+
req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2,
11691169
GFP_KERNEL);
11701170
if (!req)
11711171
goto out;

net/sunrpc/xprtrdma/xprt_rdma.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
554554

555555
enum rpcrdma_chunktype {
556556
rpcrdma_noch = 0,
557+
rpcrdma_noch_pullup,
558+
rpcrdma_noch_mapped,
557559
rpcrdma_readch,
558560
rpcrdma_areadch,
559561
rpcrdma_writech,

0 commit comments

Comments
 (0)